From db556bd97597387ec3ab7c4c006d5e0292fae147 Mon Sep 17 00:00:00 2001 From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:51:59 +0530 Subject: [PATCH 01/12] Update README.md --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8d05f10fa..79008efdf 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,11 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b ### Local deployment #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. +Accoroding to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need. +EX: +```env +VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" +``` In your root folder, create a .env file with your OPENAI and DIFFBOT keys (if you want to use both): ```env @@ -72,7 +77,7 @@ You can of course combine all (local, youtube, wikipedia, s3 and gcs) or remove ### Chat Modes -By default,all of the chat modes will be available: vector, graph+vector and graph. +By default,all of the chat modes will be available: vector, graph_vector, graph, fulltext, graph_vector_fulltext , entity_vector and global_vector. If none of the mode is mentioned in the chat modes variable all modes will be available: ```env VITE_CHAT_MODES="" @@ -80,7 +85,7 @@ VITE_CHAT_MODES="" If however you want to specify the only vector mode or only graph mode you can do that by specifying the mode in the env: ```env -VITE_CHAT_MODES="vector,graph+vector" +VITE_CHAT_MODES="vector,graph" ``` #### Running Backend and Frontend separately (dev environment) @@ -150,12 +155,14 @@ Allow unauthenticated request : Yes | VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | | VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | +| VITE_LLM_MODELS_PROD | Optional | openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash | To Distinguish models based on the Enviornment PROD or DEV | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | | RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | + ## For local llms (Ollama) 1. Pull the docker imgage of ollama ```bash From dfa56b387992636a21b90851a9b5b160dc5ac084 Mon Sep 17 00:00:00 2001 From: aashipandya <156318202+aashipandya@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:05:17 +0530 Subject: [PATCH 02/12] Dev to staging (#859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * processing count updated on cancel * format fixes * remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707) * updated disconnected nodes * updated disconnected nodes * fix: Processed count update on failed condition * added disconnected and up nodes * removed __Entity__ labels * removed graph_object * removed graph object in the function * resetting the alert message on success scenario * Modified queries * populate graph schema * not clearing the password when there is error scenario * fixed the vector index loading issue * fix: empty credentials payload for recreate vector index api * chatbot status (#676) * chatbot status * connection status check for ASK button * refresh disable check * review comment resolved * format fixes * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani… * modified the summary creation * fixed the summary creation * Configuration change. Update LLM models and remove --preload from docker file * Retry processing (#698) * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Reapply "Dockerfile changes with VITE label" This reverts commit a83e0855fbf54d2b5af009d96c4edf0bcd7ab84a. * Revert "Dockerfile changes with VITE label" This reverts commit 2840ebc9e6156c51465a9f54be72ca2d014147c2. * Concurrent processing of files (#665) * Update README.md * Droped the old vector index (#652) * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * Added chatbot "hybrid " mode use case * added the concurrent file processing * page refresh scenario * fixed waiting files processing issue in refresh scenario * removed boolean param * fixed processedCount issue * checkbox with waiting check * fixed the refresh scenario with processing files * processing files check * server side error * processing file count check for processing files less than batch size * processing count check to handle allselected files * created helper functions * code improvements * __ changes (#656) * DiffbotGraphTransformer doesn't need an LLMGraphTransformer (#659) Co-authored-by: jeromechoo * Removed experiments/llm-chatbot-python folder from DEV branch * redcued the password clear timeout * Removed experiments/Cypher_Queries.ipynb file from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * update delete query with entities * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * add BATCH_SIZE to docker * temp fixes for 0.3.1 * alert fix for less than batch size processing * new virtual env * added Hybrid Chat modes (#670) * Rename the function #657 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * Graph node filename check * env fixes with latest nvl libraries * format fixes * removed local files * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * page refresh scenario * fixed waiting files processing issue in refresh scenario * Removed experiments/llm-chatbot-python folder from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * property spell fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env changes * format fixes * set retry status * retry processing backend * added the retry icon on rows * vite changes in docker compose * added retry dialog * Integrated the Retry processing API * Integrated the Extract API fro retry processing * Integrated ndl toast component * replaced foreach with normal for loop for better performance * types improvements * used toast component * spell fix * Issue fixed * processing changes in main * function closing fix * retry processing issue fixed * autoclosing the retry popup on retry api success * removed the retry if check * resetting the node and relationship count on retry * added the enter key events on the popups * fixed wikipedia icon on large file alert popup * setting nodes to 0 and start from last processed chunk logic changes * Retry Popup fixes * status changes for upload failed scenario * kept condition specific * changed status to reprocess from retry * Reprocess wording changes * tooltip changes * wordings and size changes * Changed status to Reprocess --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * ref added for keydown (#717) * ref addded for keydown * enter key changes * tooltip changes * ishover state * theme fix * removal of handleKeydown * format fix * Remove total_pages propert. It is not used in DB. (#714) Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * Update main.py * Add print statement for document status * refactored qa integration * allow credentials true changes * reset the values to 0 when the retry option is start from begining * Update Node/Document status using SSE, Trying to fix Cancelled by Scope error * modified the chat mode settings * renamed QA_integration * added graphdatascience * moved settings to constants * modified constants * resetting the nodescount and relationshipcount * Add vector index exist condition to create * Science Molecule & database icon addition (#722) * DataScience icon addition * added gds status to connect call * icon stroke changes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Add communities check and show respective chat modes (#729) * DataScience icon addition * added checkbox to create_communities * added conditionall check for community chat modes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * youtube transcript issue (#736) * Implement GoogleApiYoutubeLoader for youtube transcript * Url encoding issue fixed for youtube URL * Get transcript using YouTubeTranscriptApi * added chnages to graph schema * added local search * 725 add checkbox for create communities (#728) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * modified local search query * added entity details for chat * modified chunkids * modified chunk_entities * Add communities Checkbox to graph viz (#739) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query * checkbox addition * filter logic * filter logic missing checks * updated_graph_query * filter logic optimised * gds active check * handle checkbox show --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Added time to process file in extract API and it's functions * Add Severity level in cloud logging * changed name * modified the query * updated entities param * added document to details * format fixes * added global embedding * added database * added database * modified is_entity * modifies chunk entities * Added secweb to fix security issues * removed QA integration * created neo4j from existing index * modified script * Integrate local search to chat details (#746) * added the commuties tab * removed unused variables * removed scipy libarary * added the mode check * Integrated the communities tab * added the cjheck * enabled the top entities mode * tabs order rearange * added the loader to sources tab for entity search+vector * fixed the chat mode per prop --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removal of unused code * removed entity label * Added Description to chat mode menu (#743) * added tooltips to chat mode menu * addition of description to menu * 741-tooltips-to-selectOptions * menu changes * acommunities name change * close changes * name changes * format fixes * Update log_struct method to add severity * community check * Entity Empty Label fix and Icon * Update Utils.ts * Retry processing - node and rels count update condition for start from beginning (#737) * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Reapply "Dockerfile changes with VITE label" This reverts commit a83e0855fbf54d2b5af009d96c4edf0bcd7ab84a. * Revert "Dockerfile changes with VITE label" This reverts commit 2840ebc9e6156c51465a9f54be72ca2d014147c2. * Concurrent processing of files (#665) * Update README.md * Droped the old vector index (#652) * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * Added chatbot "hybrid " mode use case * added the concurrent file processing * page refresh scenario * fixed waiting files processing issue in refresh scenario * removed boolean param * fixed processedCount issue * checkbox with waiting check * fixed the refresh scenario with processing files * processing files check * server side error * processing file count check for processing files less than batch size * processing count check to handle allselected files * created helper functions * code improvements * __ changes (#656) * DiffbotGraphTransformer doesn't need an LLMGraphTransformer (#659) Co-authored-by: jeromechoo * Removed experiments/llm-chatbot-python folder from DEV branch * redcued the password clear timeout * Removed experiments/Cypher_Queries.ipynb file from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * update delete query with entities * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * add BATCH_SIZE to docker * temp fixes for 0.3.1 * alert fix for less than batch size processing * new virtual env * added Hybrid Chat modes (#670) * Rename the function #657 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * Graph node filename check * env fixes with latest nvl libraries * format fixes * removed local files * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * page refresh scenario * fixed waiting files processing issue in refresh scenario * Removed experiments/llm-chatbot-python folder from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * property spell fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env changes * format fixes * set retry status * retry processing backend * added the retry icon on rows * vite changes in docker compose * added retry dialog * Integrated the Retry processing API * Integrated the Extract API fro retry processing * Integrated ndl toast component * replaced foreach with normal for loop for better performance * types improvements * used toast component * spell fix * Issue fixed * processing changes in main * function closing fix * retry processing issue fixed * autoclosing the retry popup on retry api success * removed the retry if check * resetting the node and relationship count on retry * added the enter key events on the popups * fixed wikipedia icon on large file alert popup * setting nodes to 0 and start from last processed chunk logic changes * Retry Popup fixes * status changes for upload failed scenario * kept condition specific * changed status to reprocess from retry * Reprocess wording changes * tooltip changes * wordings and size changes * Changed status to Reprocess * updated node count for start from begnning --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo * uncommented the Retry Processing * removed __Entity__ labels * spell fix * fixed postprocessing method invoking issue for odd no files * lint fix * Added filesource and name in chunks * Preload=True remove from HSTS * Graph communities (#748) * UI changes * modes enable disable * separated sources entities chunk communities * communities added into separate component * Update ChatInfoModal.tsx * added filename and source for chunksinfo * removed the console.log * mode disable changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * aria label addition * code improvements used URL class for host url check * host level check * Update Security header * encryption of localstorage values * 'mode-selection-changes' * added local chat history * added neo4j from existing index to entity vector mode * label changes * commented security header * Communities (#721) * added communities creation * added communities * removed tqdm * removed __Entity__ labels * removed graph_object * removed graph object in the function * Modified queries * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users… * added global env for communities * comment all security header * added threading to chat summarization to improve chat response time (#751) * formatted the queries and added logic for empty label (#752) * Commented youtube google api code * added the error handling for passowrd decrypt error * wordings changes * Exclude default labels from get_labels_and_relationtypes * Post-Processing-Alerts (#758) * added the alerts before and after the post processing * Tooltip changes * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * Langchain libs update (#769) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * fixed the rerendering of the table while file status is processing * fix: Read Only User Fix * Global search fulltext (#767) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * Communities bug solutions (#770) * added local chat history * added write access check * added write access param * labels cahnge for nodes * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * enable communities * removed the selected prop * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * enable communities label change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * readonly fixed on refresh * clear chat history * slectedFiles check for Chatbot * clear history --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * Added elapsed time for extarction on each breakdown function * lint and format fixes * removed dev logs * communities fix * disabled the generate graph for read only user * format fixes * graph labels change * added the readonly check for already added waiting files * Retriever evaluation using RAGAS * deleted unused file * code optimization using memo * Added elapsed_time on each api and getiing time per_entity * Added the post processing Alert showcasing the ongoing post processing jobs * fix: readonly user retry option disable * update script to get details of extarcted doc * Issue fixed, Latency count per entity * Multiple chat modes selection (#780) * added Multi modes selection * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Fix: ChatModes DeSelection on FIle Selection * Fix: Order of the chatmodes accordoing to selected chatmodes * Community optimization (#790) * modified leidens parameters * updated disconnected nodes query * excluded communities from dedup * added index creation * modified de dup query * added delete query for communities * Async way to create entities from multiple chunks (#788) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * async way to get graph documents * indentation correction * fixed graph mode error (#792) * Raga's Evaluation Metrics (#787) * added Multi modes selection * ragas eval * added response * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * ragas evaluation metric show * Output return type changed * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Added ragas to requirements * Integrated the metric api * ragas response updated, llm list updated * resolved syntax error in score * Added the Metrics Table * fix: Long text UI Issue * code optimization for evaluation * added the download button for downloading the info * key name change * Optimized the downloadClickHandler --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna * Openai gemini config (#794) * openai and gemini models as config backend * updated dropdown llm values * updated docs * Added the user action for metrics table * Graph enhancements (#795) * graph changes * graph properties changes * graph communities changes * graph type selection * checkbox check changes * format changes * Communities Bug fixes (#775) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * labels cahnge for nodes * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * enable communities * removed the selected prop * enable communities label change * communities name change * cred check * tooltip * fix: Copy Icon Theme Fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * llm name changes * build fix * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * mode changes for score display * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * New: Added the supported llm models for ragas evaluation * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * chatmode switch mode fix * Add API payload GCP logging (#805) * Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * added error message for doc retriver (#807) * copy row (#803) * copy row * column for copy * column copy * Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar * lint fixes * fix: multimode metrics state handling fix: lint fixes * fix: Multimode metrics mode change state issue fix: chunk list style issue * fix: list style fix * Correct TYPO mistake * added new env for ragas embedding model * Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * test * view graph * nodes count and relationshipcount updation fix * sourceUrl Fix * empty string "" fix to keep the default values we should keep the value blank instead "" * prop changes * props changes * retry condition update for failed files (#820) * Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Youtube transcript fix with proxy (#822) * update script for async func * ragas changes for graph retrieval mode. context added in api output (#825) * Remove extract latency from logging and add LIMIT in duplicate nodes * Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api * Update README.md * updated api structire in docs (#827) * Update backend_docs.adoc * 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Exclude session lable node from duplicate nodes list * Added the tooltip for disabled llm option (#835) * node size changes * mode removal of rows check * formatting * Exclude __Entity__ node label from duplicate node list * Update README.md * Update README.md * Update README.md * fixed the youtube link * Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware * Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Communities Id to Title (#851) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Me… * disconnected nodes (#852) * loading changes * Update score.py * added middleware * removed the unused state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: edenbuaa Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Pravesh1988 Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Prakriti Solankey Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna Co-authored-by: Jayanth T Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> --- README.md | 5 +- backend/score.py | 100 ++++++++++++++++-- backend/src/communities.py | 49 ++++++--- backend/src/graphDB_dataAccess.py | 2 +- backend/src/graph_query.py | 33 +++++- backend/src/neighbours.py | 3 +- backend/src/shared/constants.py | 15 ++- backend/test_integrationqa.py | 21 ++++ docker-compose.yml | 2 +- example.env | 1 - frontend/Dockerfile | 1 - frontend/src/components/ChatBot/ChunkInfo.tsx | 2 +- .../components/ChatBot/CommunitiesInfo.tsx | 2 +- .../src/components/ChatBot/EntitiesInfo.tsx | 2 +- .../src/components/ChatBot/SourcesInfo.tsx | 2 +- frontend/src/components/ChatBot/chatInfo.ts | 2 +- frontend/src/components/Content.tsx | 67 ++++++++++-- frontend/src/components/FileTable.tsx | 24 ++++- .../src/components/Graph/GraphViewModal.tsx | 6 +- frontend/src/components/Layout/PageLayout.tsx | 1 - .../components/Popups/ChunkPopUp/index.tsx | 72 +++++++++++++ .../ConnectionModal/ConnectionModal.tsx | 2 +- .../Deduplication/index.tsx | 2 +- .../DeleteTabForOrphanNodes/index.tsx | 2 +- frontend/src/components/QuickStarter.tsx | 16 +-- frontend/src/services/getChunkText.ts | 19 ++++ frontend/src/types.ts | 26 ++++- frontend/src/utils/Constants.ts | 99 ++++++++--------- frontend/src/utils/Utils.ts | 7 +- 29 files changed, 466 insertions(+), 119 deletions(-) create mode 100644 frontend/src/components/Popups/ChunkPopUp/index.tsx create mode 100644 frontend/src/services/getChunkText.ts diff --git a/README.md b/README.md index 79008efdf..fadf6eee9 100644 --- a/README.md +++ b/README.md @@ -45,13 +45,13 @@ DIFFBOT_API_KEY="your-diffbot-key" if you only want OpenAI: ```env -VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" +VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o" OPENAI_API_KEY="your-openai-key" ``` if you only want Diffbot: ```env -VITE_LLM_MODELS="diffbot" +VITE_LLM_MODELS_PROD="diffbot" DIFFBOT_API_KEY="your-diffbot-key" ``` @@ -149,7 +149,6 @@ Allow unauthenticated request : Yes | VITE_BACKEND_API_URL | Optional | http://localhost:8000 | URL for backend API | | VITE_BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization | | VITE_REACT_APP_SOURCES | Mandatory | local,youtube,wiki,s3 | List of input sources that will be available | -| VITE_LLM_MODELS | Mandatory | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A | VITE_CHAT_MODES | Mandatory | vector,graph+vector,graph,hybrid | Chat modes available for Q&A | VITE_ENV | Mandatory | DEV or PROD | Environment variable for the app | | VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | diff --git a/backend/score.py b/backend/score.py index f7b8c4082..03fb75de9 100644 --- a/backend/score.py +++ b/backend/score.py @@ -12,14 +12,14 @@ from langchain_google_vertexai import ChatVertexAI from src.api_response import create_api_response from src.graphDB_dataAccess import graphDBdataAccess -from src.graph_query import get_graph_results +from src.graph_query import get_graph_results,get_chunktext_results from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse from src.communities import create_communities from src.neighbours import get_neighbour_nodes import json -from typing import List, Mapping +from typing import List, Mapping, Union from starlette.middleware.sessions import SessionMiddleware import google_auth_oauthlib.flow from google.oauth2.credentials import Credentials @@ -33,8 +33,10 @@ from Secweb.ContentSecurityPolicy import ContentSecurityPolicy from Secweb.XContentTypeOptions import XContentTypeOptions from Secweb.XFrameOptions import XFrame - +from fastapi.middleware.gzip import GZipMiddleware from src.ragas_eval import * +from starlette.types import ASGIApp, Message, Receive, Scope, Send +import gzip logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -49,14 +51,42 @@ def healthy(): def sick(): return False - +class CustomGZipMiddleware: + def __init__( + self, + app: ASGIApp, + paths: List[str], + minimum_size: int = 1000, + compresslevel: int = 5 + ): + self.app = app + self.paths = paths + self.minimum_size = minimum_size + self.compresslevel = compresslevel + + async def __call__(self, scope: Scope, receive: Receive, send: Send): + if scope["type"] != "http": + return await self.app(scope, receive, send) + + path = scope["path"] + should_compress = any(path.startswith(gzip_path) for gzip_path in self.paths) + + if not should_compress: + return await self.app(scope, receive, send) + + gzip_middleware = GZipMiddleware( + app=self.app, + minimum_size=self.minimum_size, + compresslevel=self.compresslevel + ) + await gzip_middleware(scope, receive, send) app = FastAPI() # SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -# app.add_middleware(HSTS, Option={'max-age': 4}) -# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) -# app.add_middleware(XContentTypeOptions) -# app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) - +app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +app.add_middleware(XContentTypeOptions) +app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) +#app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) +app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -818,5 +848,57 @@ async def calculate_metric(question: str = Form(), finally: gc.collect() +@app.post("/fetch_chunktext") +async def fetch_chunktext( + uri: str = Form(), + database: str = Form(), + userName: str = Form(), + password: str = Form(), + document_name: str = Form(), + page_no: int = Form(1) +): + try: + payload_json_obj = { + 'api_name': 'fetch_chunktext', + 'db_url': uri, + 'userName': userName, + 'database': database, + 'document_name': document_name, + 'page_no': page_no, + 'logging_time': formatted_time(datetime.now(timezone.utc)) + } + logger.log_struct(payload_json_obj, "INFO") + start = time.time() + result = await asyncio.to_thread( + get_chunktext_results, + uri=uri, + username=userName, + password=password, + database=database, + document_name=document_name, + page_no=page_no + ) + end = time.time() + elapsed_time = end - start + json_obj = { + 'api_name': 'fetch_chunktext', + 'db_url': uri, + 'document_name': document_name, + 'page_no': page_no, + 'logging_time': formatted_time(datetime.now(timezone.utc)), + 'elapsed_api_time': f'{elapsed_time:.2f}' + } + logger.log_struct(json_obj, "INFO") + return create_api_response('Success', data=result, message=f"Total elapsed API time {elapsed_time:.2f}") + except Exception as e: + job_status = "Failed" + message = "Unable to get chunk text response" + error_message = str(e) + logging.exception(f'Exception in fetch_chunktext: {error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + + if __name__ == "__main__": uvicorn.run(app) diff --git a/backend/src/communities.py b/backend/src/communities.py index d1130150c..a38b39696 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -107,24 +107,38 @@ STORE_COMMUNITY_SUMMARIES = """ UNWIND $data AS row MERGE (c:__Community__ {id:row.community}) -SET c.summary = row.summary +SET c.summary = row.summary, + c.title = row.title """ + COMMUNITY_SYSTEM_TEMPLATE = "Given input triples, generate the information summary. No pre-amble." -COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community, -generate a natural language summary of the provided information: -{community_info} -Summary:""" +COMMUNITY_TEMPLATE = """ +Based on the provided nodes and relationships that belong to the same graph community, +generate following output in exact format +title: A concise title, no more than 4 words, +summary: A natural language summary of the information +{community_info} +Example output: +title: Example Title, +summary: This is an example summary that describes the key information of this community. +""" PARENT_COMMUNITY_SYSTEM_TEMPLATE = "Given an input list of community summaries, generate a summary of the information" PARENT_COMMUNITY_TEMPLATE = """Based on the provided list of community summaries that belong to the same graph community, -generate a natural language summary of the information.Include all the necessary information as possible +generate following output in exact format +title: A concise title, no more than 4 words, +summary: A natural language summary of the information. Include all the necessary information as much as possible. + {community_info} -Summary:""" +Example output: +title: Example Title, +summary: This is an example summary that describes the key information of this community. +""" GET_COMMUNITY_DETAILS = """ @@ -277,8 +291,17 @@ def process_community_info(community, chain, is_parent=False): combined_text = " ".join(f"Summary {i+1}: {summary}" for i, summary in enumerate(community.get("texts", []))) else: combined_text = prepare_string(community) - summary = chain.invoke({'community_info': combined_text}) - return {"community": community['communityId'], "summary": summary} + summary_response = chain.invoke({'community_info': combined_text}) + lines = summary_response.splitlines() + title = "Untitled Community" + summary = "" + for line in lines: + if line.lower().startswith("title"): + title = line.split(":", 1)[-1].strip() + elif line.lower().startswith("summary"): + summary = line.split(":", 1)[-1].strip() + logging.info(f"Community Title : {title}") + return {"community": community['communityId'], "title":title, "summary": summary} except Exception as e: logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") return None @@ -291,7 +314,7 @@ def create_community_summaries(gds, model): summaries = [] with ThreadPoolExecutor() as executor: futures = [executor.submit(process_community_info, community, community_chain) for community in community_info_list.to_dict(orient="records")] - + for future in as_completed(futures): result = future.result() if result: @@ -482,9 +505,3 @@ def create_communities(uri, username, password, database,model=COMMUNITY_CREATIO logging.warning("Failed to write communities. Constraint was not applied.") except Exception as e: logging.error(f"Failed to create communities: {e}") - - - - - - diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 58834eb92..82391d1e4 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -354,7 +354,7 @@ def get_duplicate_nodes_list(self): score_value = float(os.environ.get('DUPLICATE_SCORE_VALUE')) text_distance = int(os.environ.get('DUPLICATE_TEXT_DISTANCE')) query_duplicate_nodes = """ - MATCH (n:!Chunk&!Session&!Document&!`__Community__`) with n + MATCH (n:!Chunk&!Session&!Document&!`__Community__`&!`__Entity__`) with n WHERE n.embedding is not null and n.id is not null // and size(toString(n.id)) > 3 WITH n ORDER BY count {{ (n)--() }} DESC, size(toString(n.id)) DESC // updated WITH collect(n) as nodes diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index fb7333b48..86739ba6c 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -3,7 +3,7 @@ from neo4j import GraphDatabase import os import json -from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY +from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY # from neo4j.debug import watch # watch("neo4j") @@ -226,3 +226,34 @@ def get_graph_results(uri, username, password,database,document_names): driver.close() +def get_chunktext_results(uri, username, password, database, document_name, page_no): + """Retrieves chunk text, position, and page number from graph data with pagination.""" + try: + logging.info("Starting chunk text query process") + offset = 10 + skip = (page_no - 1) * offset + limit = offset + driver = GraphDatabase.driver(uri, auth=(username, password)) + with driver.session(database=database) as session: + total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name) + total_chunks = total_chunks_result.single()["total_chunks"] + total_pages = (total_chunks + offset - 1) // offset # Calculate total pages + records = session.run(CHUNK_TEXT_QUERY, file_name=document_name, skip=skip, limit=limit) + pageitems = [ + { + "text": record["chunk_text"], + "position": record["chunk_position"], + "pagenumber": record["page_number"] + } + for record in records + ] + logging.info(f"Query process completed with {len(pageitems)} chunks retrieved") + return { + "pageitems": pageitems, + "total_pages": total_pages + } + except Exception as e: + logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}") + raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e + finally: + driver.close() \ No newline at end of file diff --git a/backend/src/neighbours.py b/backend/src/neighbours.py index 08022ecc6..431d5b4bd 100644 --- a/backend/src/neighbours.py +++ b/backend/src/neighbours.py @@ -20,7 +20,8 @@ labels: [coalesce(apoc.coll.removeAll(labels(node), ['__Entity__'])[0], "*")], element_id: elementId(node), properties: { - id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END + id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END, + title: CASE WHEN node.title IS NOT NULL THEN node.title ELSE " " END } } ] AS nodes, diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index b58fd3a67..084b5d1ba 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -161,6 +161,19 @@ ] AS entities """ +COUNT_CHUNKS_QUERY = """ +MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk) +RETURN count(c) AS total_chunks +""" + +CHUNK_TEXT_QUERY = """ +MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk) +RETURN c.text AS chunk_text, c.position AS chunk_position, c.page_number AS page_number +ORDER BY c.position +SKIP $skip +LIMIT $limit +""" + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 @@ -717,4 +730,4 @@ value "2023-03-15"." "## 5. Strict Compliance\n" "Adhere to the rules strictly. Non-compliance will result in termination." - """ \ No newline at end of file + """ diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 548e9706f..ede8077f7 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -124,6 +124,27 @@ def test_graph_website(model_name): print("Fail: ", e) return weburl_result +def test_graph_website(model_name): + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '') + logging.info("WebUrl test done") + print(weburl_result) + + try: + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + return weburl_result + + def test_graph_from_youtube_video(model_name): """Test graph creation from a YouTube video.""" source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' diff --git a/docker-compose.yml b/docker-compose.yml index ea6d2c050..8a0fdc4b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,7 +53,6 @@ services: args: - VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-http://localhost:8000} - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,wiki,s3} - - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-} - VITE_BLOOM_URL=${VITE_BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true} - VITE_TIME_PER_PAGE=${VITE_TIME_PER_PAGE-50} @@ -62,6 +61,7 @@ services: - VITE_ENV=${VITE_ENV-DEV} - VITE_CHAT_MODES=${VITE_CHAT_MODES-} - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} + - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} volumes: - ./frontend:/app diff --git a/example.env b/example.env index 6b542daf1..5d3a598c9 100644 --- a/example.env +++ b/example.env @@ -24,7 +24,6 @@ ENTITY_EMBEDDING=True VITE_BACKEND_API_URL="http://localhost:8000" VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" -VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 3053e1ba9..311294f4a 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -20,7 +20,6 @@ RUN yarn install COPY . ./ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_REACT_APP_SOURCES=$VITE_REACT_APP_SOURCES \ - VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID \ VITE_BLOOM_URL=$VITE_BLOOM_URL \ VITE_CHUNK_SIZE=$VITE_CHUNK_SIZE \ diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 8568e0b5d..b58ee2c4d 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -261,4 +261,4 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ); }; -export default ChunkInfo; +export default ChunkInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index bc8e5e8d3..3c6899c5b 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -75,4 +75,4 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ); }; -export default CommunitiesInfo; +export default CommunitiesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 80e4fdafa..22eca8a57 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -146,4 +146,4 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ); }; -export default EntitiesInfo; +export default EntitiesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index a934913d9..ddfe92a6e 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -106,7 +106,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { )} {!link?.startsWith('s3://') && - !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) && ( + !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && (
diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index c7e990ae7..1a229dc70 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -37,4 +37,4 @@ export const handleGraphNodeClick = async ( setLoadingGraphView(false); } } -}; +}; \ No newline at end of file diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 28caddc1c..8e8516666 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -11,6 +11,7 @@ import { CustomFile, OptionType, UserCredentials, + chunkdata, connectionState, } from '../types'; import deleteAPI from '../services/DeleteFiles'; @@ -44,6 +45,8 @@ import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; import { useMessageContext } from '../context/UserMessages'; import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast'; +import { getChunkText } from '../services/getChunkText'; +import ChunkPopUp from './Popups/ChunkPopUp'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -70,6 +73,7 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); + const [documentName, setDocumentName] = useState(''); const { setUserCredentials, userCredentials, @@ -85,6 +89,12 @@ const Content: React.FC = ({ const [retryFile, setRetryFile] = useState(''); const [retryLoading, setRetryLoading] = useState(false); const [showRetryPopup, toggleRetryPopup] = useReducer((state) => !state, false); + const [showChunkPopup, toggleChunkPopup] = useReducer((state) => !state, false); + const [chunksLoading, toggleChunksLoading] = useReducer((state) => !state, false); + const [currentPage, setCurrentPage] = useState(0); + const [totalPageCount, setTotalPageCount] = useState(null); + const [textChunks, setTextChunks] = useState([]); + const [alertStateForRetry, setAlertStateForRetry] = useState({ showAlert: false, alertType: 'neutral', @@ -107,11 +117,12 @@ const Content: React.FC = ({ setProcessedCount, setchatModes, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'|'neighborView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); + const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { showNormalToast(`${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`); @@ -122,7 +133,12 @@ const Content: React.FC = ({ } ); const childRef = useRef(null); - + const incrementPage = () => { + setCurrentPage((prev) => prev + 1); + }; + const decrementPage = () => { + setCurrentPage((prev) => prev - 1); + }; useEffect(() => { if (!init && !searchParams.has('connectURL')) { let session = localStorage.getItem('neo4j.connection'); @@ -149,7 +165,13 @@ const Content: React.FC = ({ setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } }, []); - + useEffect(() => { + if (currentPage >= 1) { + (async () => { + await getChunks(documentName, currentPage); + })(); + } + }, [currentPage, documentName]); useEffect(() => { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { @@ -251,7 +273,15 @@ const Content: React.FC = ({ setModel(selectedOption?.value); } }; - + const getChunks = async (name: string, pageNo: number) => { + toggleChunksLoading(); + const response = await getChunkText(userCredentials as UserCredentials, name, pageNo); + setTextChunks(response.data.data.pageitems); + if (!totalPageCount) { + setTotalPageCount(response.data.data.total_pages); + } + toggleChunksLoading(); + }; const extractData = async (uid: string, isselectedRows = false, filesTobeProcess: CustomFile[]) => { if (!isselectedRows) { const fileItem = filesData.find((f) => f.id == uid); @@ -497,7 +527,7 @@ const Content: React.FC = ({ } }; - function processWaitingFilesOnRefresh() { + const processWaitingFilesOnRefresh = () => { let data = []; const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length; @@ -517,7 +547,7 @@ const Content: React.FC = ({ .filter((f) => f.status === 'New' || f.status == 'Reprocess'); addFilesToQueue(selectedNewFiles as CustomFile[]); } - } + }; const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; @@ -771,6 +801,18 @@ const Content: React.FC = ({ view='contentView' > )} + {showChunkPopup && ( + toggleChunkPopup()} + showChunkPopup={showChunkPopup} + chunks={textChunks} + incrementPage={incrementPage} + decrementPage={decrementPage} + currentPage={currentPage} + totalPageCount={totalPageCount} + > + )} {showEnhancementDialog && ( = ({ setRetryFile(id); toggleRetryPopup(); }} + onChunkView={async (name) => { + setDocumentName(name); + if (name != documentName) { + toggleChunkPopup(); + if (totalPageCount) { + setTotalPageCount(null); + } + setCurrentPage(1); + // await getChunks(name, 1); + } + }} ref={childRef} handleGenerateGraph={processWaitingFilesOnRefresh} > @@ -940,4 +993,4 @@ const Content: React.FC = ({ ); }; -export default Content; +export default Content; \ No newline at end of file diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index ec367b100..42b82572a 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -38,8 +38,9 @@ import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, import { useCredentials } from '../context/UserCredentials'; import { ArrowPathIconSolid, - ClipboardDocumentIconOutline, + ClipboardDocumentIconSolid, MagnifyingGlassCircleIconSolid, + DocumentTextIconSolid, } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; @@ -56,7 +57,7 @@ import { ThemeWrapperContext } from '../context/ThemeWrapper'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials, isReadOnlyUser } = useCredentials(); @@ -527,10 +528,25 @@ const FileTable = forwardRef((props, ref) => { handleCopy(copied); }} > - + + + { + onChunkView(info?.row?.original?.name as string); + }} + clean + placement='left' + label='chunktextaction' + text='View Chunks' + size='large' + disabled={info.getValue() === 'Uploading'} + > + ), + size: 300, + minSize: 180, header: () => Actions, footer: (info) => info.column.id, }), @@ -946,4 +962,4 @@ const FileTable = forwardRef((props, ref) => { ); }); -export default FileTable; +export default FileTable; \ No newline at end of file diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index e50f215d1..ac35a93ae 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -123,9 +123,7 @@ const GraphViewModal: React.FunctionComponent = ({ try { const result = await fetchData(); if (result && result.data.data.nodes.length > 0) { - const neoNodes = result.data.data.nodes - .map((f: Node) => f) - .filter((node: ExtendedNode) => node.labels.length === 1); + const neoNodes = result.data.data.nodes; const nodeIds = new Set(neoNodes.map((node: any) => node.element_id)); const neoRels = result.data.data.relationships .map((f: Relationship) => f) @@ -447,4 +445,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; +export default GraphViewModal; \ No newline at end of file diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 48e94c023..bf3082fc8 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -31,7 +31,6 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); diff --git a/frontend/src/components/Popups/ChunkPopUp/index.tsx b/frontend/src/components/Popups/ChunkPopUp/index.tsx new file mode 100644 index 000000000..7966ddd6e --- /dev/null +++ b/frontend/src/components/Popups/ChunkPopUp/index.tsx @@ -0,0 +1,72 @@ +import { Dialog, Typography, Flex, IconButton } from '@neo4j-ndl/react'; +import { ArrowLeftIconOutline, ArrowRightIconOutline } from '@neo4j-ndl/react/icons'; +import { chunkdata } from '../../../types'; +import Loader from '../../../utils/Loader'; +import { useMemo } from 'react'; + +const ChunkPopUp = ({ + showChunkPopup, + chunks, + onClose, + chunksLoading, + incrementPage, + decrementPage, + currentPage, + totalPageCount, +}: { + showChunkPopup: boolean; + chunks: chunkdata[]; + onClose: () => void; + chunksLoading: boolean; + incrementPage: () => void; + decrementPage: () => void; + currentPage: number | null; + totalPageCount: number | null; +}) => { + const sortedChunksData = useMemo(() => { + return chunks.sort((a, b) => a.position - b.position); + }, [chunks]); + return ( + + Text Chunks + + {chunksLoading ? ( + + ) : ( +
    + {sortedChunksData.map((c, idx) => ( +
  1. + + + Position : + {c.position} + + {c.pagenumber ? ( + + Page No :{' '} + {c.pagenumber} + + ) : null} + {c.text} + +
  2. + ))} +
+ )} +
+ {totalPageCount != null && totalPageCount > 1 && ( + + + + + + + + + + + )} +
+ ); +}; +export default ChunkPopUp; diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index ad7e7b48e..76df732a4 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -460,4 +460,4 @@ export default function ConnectionModal({ ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 7140bb6b2..330f5f657 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -352,4 +352,4 @@ export default function DeduplicationTab() { )} ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index bcc2597f1..6d4daae10 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -315,4 +315,4 @@ export default function DeletePopUpForOrphanNodes({ )} ); -} +} \ No newline at end of file diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 1a4e169d2..4dc4b4a0a 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -19,14 +19,14 @@ const QuickStarter: React.FunctionComponent = () => { - -
- - + +
+ + diff --git a/frontend/src/services/getChunkText.ts b/frontend/src/services/getChunkText.ts new file mode 100644 index 000000000..f9825dc34 --- /dev/null +++ b/frontend/src/services/getChunkText.ts @@ -0,0 +1,19 @@ +import { UserCredentials, chunksData } from '../types'; +import api from '../API/Index'; + +export const getChunkText = async (userCredentials: UserCredentials, documentName: string, page_no: number) => { + const formData = new FormData(); + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + formData.append('document_name', documentName); + formData.append('page_no', page_no.toString()); + try { + const response = await api.post(`/fetch_chunktext`, formData); + return response; + } catch (error) { + console.log(error); + throw error; + } +}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 02dbc0a2b..6ebf26b45 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -154,6 +154,7 @@ export interface FileTableProps { connectionStatus: boolean; setConnectionStatus: Dispatch>; onInspect: (id: string) => void; + onChunkView: (name: string) => void; handleGenerateGraph: () => void; onRetry: (id: string) => void; } @@ -379,7 +380,13 @@ export interface commonserverresponse { error?: string; message?: string | orphanTotalNodes; file_name?: string; - data?: labelsAndTypes | labelsAndTypes[] | uploadData | orphanNodeProps[] | dupNodes[]; + data?: + | labelsAndTypes + | labelsAndTypes[] + | uploadData + | orphanNodeProps[] + | dupNodes[] + | { pageitems: chunkdata[]; total_pages: number }; } export interface dupNodeProps { id: string; @@ -397,6 +404,11 @@ export interface selectedDuplicateNodes { firstElementId: string; similarElementIds: string[]; } +export interface chunkdata { + text: string; + position: number; + pagenumber: null | number; +} export interface ScehmaFromText extends Partial { data: labelsAndTypes; } @@ -407,6 +419,9 @@ export interface ServerData extends Partial { export interface duplicateNodesData extends Partial { data: dupNodes[]; } +export interface chunksData extends Partial { + data: { pageitems: chunkdata[]; total_pages: number }; +} export interface OrphanNodeResponse extends Partial { data: orphanNodeProps[]; } @@ -559,6 +574,10 @@ export interface MessagesContextProviderProps { children: ReactNode; } +export interface GraphContextProviderProps { + children: ReactNode; +} + export interface Chunk { id: string; position: number; @@ -715,6 +734,11 @@ export interface MessageContextType { setClearHistoryData: Dispatch>; } +export interface GraphContextType { + loadingGraph: boolean; + setLoadingGraph: Dispatch>; +} + export interface DatabaseStatusProps { isConnected: boolean; isGdsActive: boolean; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8d9289fd8..06d03d28b 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -12,26 +12,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai_gpt_3.5', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_llama_v3p2_90b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai_gpt_3.5', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3p2_90b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai_gpt_4o') ? 'openai_gpt_4o' : llms?.includes('gemini_1.5_pro') - ? 'gemini_1.5_pro' - : 'diffbot'; + ? 'gemini_1.5_pro' + : 'diffbot'; export const supportedLLmsForRagas = [ 'openai_gpt_3.5', 'openai_gpt_4', @@ -76,40 +76,40 @@ export const chatModeReadableLables: Record = { export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ - mode: mode.trim(), - description: getDescriptionForChatMode(mode.trim()), - })) + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - { - mode: chatModeLables.vector, - description: 'Performs semantic similarity search on text chunks using vector indexing.', - }, - { - mode: chatModeLables.graph, - description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', - }, - { - mode: chatModeLables['graph+vector'], - description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', - }, - { - mode: chatModeLables.fulltext, - description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', - }, - { - mode: chatModeLables['graph+vector+fulltext'], - description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', - }, - { - mode: chatModeLables['entity search+vector'], - description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', - }, - { - mode: chatModeLables['global search+vector+fulltext'], - description: - 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', - }, - ]; + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', + }, + { + mode: chatModeLables['graph+vector'], + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables['graph+vector+fulltext'], + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables['entity search+vector'], + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + { + mode: chatModeLables['global search+vector+fulltext'], + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; @@ -291,6 +291,7 @@ export const graphLabels = { docChunk: 'Document & Chunk', community: 'Communities', noNodesRels: 'No Nodes and No relationships', + neighborView: 'neighborView' }; export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 6945c17ca..f407f21e0 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -130,6 +130,9 @@ export const getNodeCaption = (node: any) => { if (node.properties.fileName) { return node.properties.fileName; } + if(node.labels[0] === '__Community__'){ + return node.properties.title; + } return node.properties.id; }; @@ -156,11 +159,11 @@ export function extractPdfFileName(url: string): string { export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRelationship[]) => { const schemeVal: Scheme = {}; let iterator = 0; - const labels: string[] = neoNodes.map((f: any) => f.labels); + const labels: string[] = neoNodes.flatMap((f: any) => f.labels); for (let index = 0; index < labels.length; index++) { const label = labels[index]; if (schemeVal[label] == undefined) { - schemeVal[label] = calcWordColor(label[0]); + schemeVal[label] = calcWordColor(label); iterator += 1; } } From 5b1a683ec858e3db291d61de49493b8840bce613 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:03:48 +0530 Subject: [PATCH 03/12] Dev (#918) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add communities Checkbox to graph viz (#739) * DataScience icon addition * added checkbox to create_communities * added gds status to connect call * added conditionall check for community chat modes * icon stroke changes * isgds active check * icon changes * isGdsVal change * format fixes * checkbox check uncheck change * graph query * checkbox addition * filter logic * filter logic missing checks * updated_graph_query * filter logic optimised * gds active check * handle checkbox show --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Added time to process file in extract API and it's functions * Add Severity level in cloud logging * changed name * modified the query * updated entities param * added document to details * format fixes * added global embedding * added database * added database * modified is_entity * modifies chunk entities * Added secweb to fix security issues * removed QA integration * created neo4j from existing index * modified script * Integrate local search to chat details (#746) * added the commuties tab * removed unused variables * removed scipy libarary * added the mode check * Integrated the communities tab * added the cjheck * enabled the top entities mode * tabs order rearange * added the loader to sources tab for entity search+vector * fixed the chat mode per prop --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removal of unused code * removed entity label * Added Description to chat mode menu (#743) * added tooltips to chat mode menu * addition of description to menu * 741-tooltips-to-selectOptions * menu changes * acommunities name change * close changes * name changes * format fixes * Update log_struct method to add severity * community check * Entity Empty Label fix and Icon * Update Utils.ts * Retry processing - node and rels count update condition for start from beginning (#737) * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Reapply "Dockerfile changes with VITE label" This reverts commit a83e0855fbf54d2b5af009d96c4edf0bcd7ab84a. * Revert "Dockerfile changes with VITE label" This reverts commit 2840ebc9e6156c51465a9f54be72ca2d014147c2. * Concurrent processing of files (#665) * Update README.md * Droped the old vector index (#652) * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * Added chatbot "hybrid " mode use case * added the concurrent file processing * page refresh scenario * fixed waiting files processing issue in refresh scenario * removed boolean param * fixed processedCount issue * checkbox with waiting check * fixed the refresh scenario with processing files * processing files check * server side error * processing file count check for processing files less than batch size * processing count check to handle allselected files * created helper functions * code improvements * __ changes (#656) * DiffbotGraphTransformer doesn't need an LLMGraphTransformer (#659) Co-authored-by: jeromechoo * Removed experiments/llm-chatbot-python folder from DEV branch * redcued the password clear timeout * Removed experiments/Cypher_Queries.ipynb file from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * update delete query with entities * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * add BATCH_SIZE to docker * temp fixes for 0.3.1 * alert fix for less than batch size processing * new virtual env * added Hybrid Chat modes (#670) * Rename the function #657 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * Graph node filename check * env fixes with latest nvl libraries * format fixes * removed local files * Remove TotalPages when save file on local (#684) * file_name reference and verify_ssl issue fixed (#683) * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * added cypher_queries and llm chatbot files * updated llm-chatbot-python * added llm-chatbot-python * updated llm-chatbot-python folder * page refresh scenario * fixed waiting files processing issue in refresh scenario * Removed experiments/llm-chatbot-python folder from DEV branch * disabled the closed button on banner and connection dialog while API is in pending state * node id check (#663) * Status source and type filtering (#664) * status source * Name change * type change * rollback to previous working nvl version * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Status source and type filtering (#664) * status source * Name change * type change * added the alert * temp fixes for 0.3.1 * label and checkboxes placement changes (#675) * label and checkboxes placement changes * checkbox placement changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * ndl changes * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env fixes with latest nvl libraries * format fixes * User flow changes for recreating supported vector index (#682) * removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * property spell fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * env changes * format fixes * set retry status * retry processing backend * added the retry icon on rows * vite changes in docker compose * added retry dialog * Integrated the Retry processing API * Integrated the Extract API fro retry processing * Integrated ndl toast component * replaced foreach with normal for loop for better performance * types improvements * used toast component * spell fix * Issue fixed * processing changes in main * function closing fix * retry processing issue fixed * autoclosing the retry popup on retry api success * removed the retry if check * resetting the node and relationship count on retry * added the enter key events on the popups * fixed wikipedia icon on large file alert popup * setting nodes to 0 and start from last processed chunk logic changes * Retry Popup fixes * status changes for upload failed scenario * kept condition specific * changed status to reprocess from retry * Reprocess wording changes * tooltip changes * wordings and size changes * Changed status to Reprocess * updated node count for start from begnning --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Jerome Choo Co-authored-by: jeromechoo * uncommented the Retry Processing * removed __Entity__ labels * spell fix * fixed postprocessing method invoking issue for odd no files * lint fix * Added filesource and name in chunks * Preload=True remove from HSTS * Graph communities (#748) * UI changes * modes enable disable * separated sources entities chunk communities * communities added into separate component * Update ChatInfoModal.tsx * added filename and source for chunksinfo * removed the console.log * mode disable changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * aria label addition * code improvements used URL class for host url check * host level check * Update Security header * encryption of localstorage values * 'mode-selection-changes' * added local chat history * added neo4j from existing index to entity vector mode * label changes * commented security header * Communities (#721) * added communities creation * added communities * removed tqdm * removed __Entity__ labels * removed graph_object * removed graph object in the function * Modified queries * added properties and modified to entity labels * Post processing call after all files completion (#716) * Dev To STAGING (#532) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#535) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Fix typo: correct 'josn_obj' to 'json_obj' (#697) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users… * added global env for communities * comment all security header * added threading to chat summarization to improve chat response time (#751) * formatted the queries and added logic for empty label (#752) * Commented youtube google api code * added the error handling for passowrd decrypt error * wordings changes * Exclude default labels from get_labels_and_relationtypes * Post-Processing-Alerts (#758) * added the alerts before and after the post processing * Tooltip changes * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * Langchain libs update (#769) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * fixed the rerendering of the table while file status is processing * fix: Read Only User Fix * Global search fulltext (#767) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * Communities bug solutions (#770) * added local chat history * added write access check * added write access param * labels cahnge for nodes * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * enable communities * removed the selected prop * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * storing the gds status and write access on refresh * enable communities label change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * readonly fixed on refresh * clear chat history * slectedFiles check for Chatbot * clear history --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> * Added elapsed time for extarction on each breakdown function * lint and format fixes * removed dev logs * communities fix * disabled the generate graph for read only user * format fixes * graph labels change * added the readonly check for already added waiting files * Retriever evaluation using RAGAS * deleted unused file * code optimization using memo * Added elapsed_time on each api and getiing time per_entity * Added the post processing Alert showcasing the ongoing post processing jobs * fix: readonly user retry option disable * update script to get details of extarcted doc * Issue fixed, Latency count per entity * Multiple chat modes selection (#780) * added Multi modes selection * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Fix: ChatModes DeSelection on FIle Selection * Fix: Order of the chatmodes accordoing to selected chatmodes * Community optimization (#790) * modified leidens parameters * updated disconnected nodes query * excluded communities from dedup * added index creation * modified de dup query * added delete query for communities * Async way to create entities from multiple chunks (#788) * LLMs with latest langchain dev libraries * conflict resolved * all llm models with latest library changes * async way to get graph documents * indentation correction * fixed graph mode error (#792) * Raga's Evaluation Metrics (#787) * added Multi modes selection * ragas eval * added response * multimodes state mangement * fix: state handling of chat details of the default mode * Added the ChatModeSwitch Component * modes switch statemangement * added the chatmodes switch in both view * removed the copied text * Handled the error scenario * fix: speech issue between modes * ragas evaluation metric show * Output return type changed * fix: Handled activespeech speech and othermessage modes switch * used requestanimationframe instead of setTimeOut * removed the commented code * Added ragas to requirements * Integrated the metric api * ragas response updated, llm list updated * resolved syntax error in score * Added the Metrics Table * fix: Long text UI Issue * code optimization for evaluation * added the download button for downloading the info * key name change * Optimized the downloadClickHandler --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna * Openai gemini config (#794) * openai and gemini models as config backend * updated dropdown llm values * updated docs * Added the user action for metrics table * Graph enhancements (#795) * graph changes * graph properties changes * graph communities changes * graph type selection * checkbox check changes * format changes * Communities Bug fixes (#775) * added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * labels cahnge for nodes * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * enable communities * removed the selected prop * enable communities label change * communities name change * cred check * tooltip * fix: Copy Icon Theme Fix --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * llm name changes * build fix * default mode fix * ragas model names update * lint fixes * Chunk Entities API condition * added the tooltip for unsupported lllms for ragas metric loading * removed unused imports * multimode fix when we get error response * mode changes for score display * fix: Fixed the details state handling between multiple chats feature: Added the warning banner If selected llm model is not supported for raga's evaluation * Fix: Entity Mode Width Fix * diffbot fix for async (#797) * Minor changes (#798) * added congig variable for default diffbot chat model * fulltext index creation is skipped when the labels are empty * entity vector change * added optinal to communities for entity mode * updated the entity query --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * New: Added the supported llm models for ragas evaluation * Fix: Communitites Tab is displayed based communitites length * added the conversation download button (#800) * model name correction * chatmode switch mode fix * Add API payload GCP logging (#805) * Adding Links to get neighboring nodes (#796) * addition of link * added neighbours query * implemented with driver * updated the query * communitiesInfo name change * communities.tsx removed * api integration * modified response * entities change * chunk and communities * chunk space removal * added element id to chunks * loading on click * format changes * added file name for Dcoumrnt node * chat token cut off model name update * icon change * duplicate sources removal * Entity change --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * added error message for doc retriver (#807) * copy row (#803) * copy row * column for copy * column copy * Raga's Evaluation For Multi Modes (#806) * Updatedmodels for ragas eval * context utilization metrics removed * updated supported llms for ragas * removed context utilization * Implemented Parallel API * multi api calls error resolved * MultiMode Metrics * Fix: Metric Evalution For Single Mode * multi modes ragas evaluation * api payload changes * metric api output format changed * multi mode ragas changes * removed pre process dataset * api response changes * Multimode metrics api integration * nan error for no answer resolved * QA integration changes --------- Co-authored-by: kaustubh-darekar * lint fixes * fix: multimode metrics state handling fix: lint fixes * fix: Multimode metrics mode change state issue fix: chunk list style issue * fix: list style fix * Correct TYPO mistake * added new env for ragas embedding model * Props name changes (#811) * Props name changes * removed the accesstoken from row on copy action * props changes for dropzone component * graph view changes --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * test * view graph * nodes count and relationshipcount updation fix * sourceUrl Fix * empty string "" fix to keep the default values we should keep the value blank instead "" * prop changes * props changes * retry condition update for failed files (#820) * Chat modes name changes (#815) * Props name changes * removed the accesstoken from row on copy action * updated chat mode names * Chat Modes Name Changes * lint fixes * using readble format In UI * removal of size to avoid console warning * key add --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Youtube transcript fix with proxy (#822) * update script for async func * ragas changes for graph retrieval mode. context added in api output (#825) * Remove extract latency from logging and add LIMIT in duplicate nodes * Document updates (#828) * document updated with ragas evaluation information * formatting changes * chatbot api documentation updated * api details added in document * function name changed for drop create vector index api * Update README.md * updated api structire in docs (#827) * Update backend_docs.adoc * 821 llm model listing (#823) * added logic for document filters * LLM models * message change * link added * removed the text --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * Exclude session lable node from duplicate nodes list * Added the tooltip for disabled llm option (#835) * node size changes * mode removal of rows check * formatting * Exclude __Entity__ node label from duplicate node list * Update README.md * Update README.md * Update README.md * fixed the youtube link * Security header and GZIPMiddleware (#847) * Added security header all API * Add GZipMiddleware * Chunk Text Details (#850) * Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Communities Id to Title (#851) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Me… * disconnected nodes (#852) * loading changes * Update score.py * added middleware * removed the unused state * Youtube timestamp (#877) * youtube timestamp added to metadata * updated timestamps format while extraction * added fix for last chunk * updated default values of timestamp --------- Co-authored-by: kaustubh-darekar * Handled Nonetype error during global search. (#876) * Additional metrics using ground truth (#855) * Updating ragas metrics * added the service for additional metrics * additional metrics api * Adding Rouge to requirement * changes done for additional metrics for gemini model * Additional metrics changes related to gemini model * Adding Rouge_Score Version * Api Integration * payload changes * payload fix * Fixing Eval Error * Adding fact_score metric * code refactoring * table integration * data binding * Integrated additional metrics on multimodes * removed fact score * Removing Fact Score * fix: Multimode fix * custommiddleware for gzip * removed unused state * message changes * uncommented gzipmiddleware * code refactoring * removed settings modal code * Table UI Fixes * removed state * UX improvements for chunks popup * added the status check * ndl version changes * tip and dropdown changes * icon fixes * contextmenu fix * Box CSS fix * icon fixes * icon changes * IsRoot fix * added the tooltip for metrics * Menu fix inside modal * hover color fix * menu changes * format and lint fixes --------- Co-authored-by: a-s-poorna Co-authored-by: kaustubh-darekar * Url changes and state management (#870) * Url changes and state management * comment removal * state management * added login * handled delete * connection status check * open login modal and retain chat * merge fixes * retaining messages * removed unused prop * chat mode width fix * Table selection Fix * Table issue (#885) * table changes * removed interdeterminant checkbox * removed material ui checkbox --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * Logging properties update, remove payload json * fix: readme typos (#887) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena * youtube url fix * Commented CSP middleware and added endpoint backend_connection_configuation * added csp header * removed the useEffect * Table issue (#888) * table changes * removed interdeterminant checkbox * removed material ui checkbox * labels changes * node labels * messages fix * aria-label added --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * key fix * Update README.md * Update README.md * Update README.md * removed extra document nodes and combine chunk logic (#894) * Update README.md * Update README.md * conditional deployment based on the enviornment * Update README.md * Update README.md * removed the reference answer checkbox and textarea while additional metrics are loading * LLM_MODELS * re process feature state renaming (#898) * Status Change From Reprocess to Ready To Reprocess * Added the description * text changes * Community Counts after post processing (#890) * Community count updated in post processing api * Community count query changed * API integration for communities post counts * node and relationships count * filename check * show communities in popover only if its GDS * Code segregation --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> * format and checked fixes (#897) * added info to show 50 chunks processing (#899) * format and lint fixes * Env changes (#896) * env changes with state management * format and lint fixes * Update frontend_docs.adoc * Update backend_docs.adoc * state changes * error handling * button handling * Update Content.tsx * Update Content.tsx * build fix * communitifiles array check * combining one chunk (#901) * combining one chunk * updated llm.py * updated llm.py * Delete query refined to delete all related nodes of file (#904) * readonly change * Prod v6 fix (#909) * Staging to main (#735) * Dev (#537) * format fixes and graph schema indication fix * Update README.md * added chat modes variable in env updated the readme * spell fix * added the chat mode in env table * added the logos * fixed the overflow issues * removed the extra fix * Fixed specific scenario "when the text from schema closes it should reopen the previous modal" * readme changes * removed dev console logs * added new retrieval query (#533) * format fixes and tab rendering fix * fixed the setting modal reopen issue --------- Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> * disabled the sumbit buttom on loading * Deduplication tab (#566) * de-duplication API * Update De-Duplicate query * created the Deduplication tab * added the API service * added the removeable tags for similar nodes in deduplication tab * Integrate Tag * added GraphLabel * added loader state * added the merge service * integrated the merge API * Merge Query issue fixed * Auto refresh the duplicate nodes after merging operation * added the description for de duplication * reset on merging --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Update frontend_docs.adoc (#538) * Update frontend_docs.adoc * doc update * Images * Images folder change * Images folder change * test image * Update frontend_docs.adoc * image change * Update frontend_docs.adoc * Update frontend_docs.adoc * added the Graph Mode SS * added the Query SS * Update frontend_docs.adoc * conflics fix * conflict fix * Update frontend_docs.adoc --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * updated langchain versions (#565) * Update the De-Duplication query * Node relationship id type none issue (#547) * de-duplication API * Update De-Duplicate query * Issue fixed Nodes,Relationship Id and Type None or Blank * added the tooltips * type fix * Unneccory import * added score threshold and added some error handling (#571) * Update requirements.txt * Tooltip and other UI fixes (#572) * Staging To Main (#495) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * Dev (#433) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * DEV to STAGING (#461) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * DEV to STAGING (#462) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> * added upload api * changed the dropzone error message * Dev to staging (#466) * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * recent merges * pdf deletion due to out of diskspace * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * Convert is_cancelled value from string to bool * added the default page size * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * offset in chunks (#389) * page number in gcs loader (#393) * added youtube timestamps (#392) * chat pop up button (#387) * expand * minimize-icon * css changes * chat history * chatbot wider Side Nav * expand icon * chatbot UI * Delete * merge fixes * code suggestions --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * chunks create before extraction using is_pre_process variable (#383) * chunks create before extraction using is_pre_process variable * Return total pages for Model * update requirement.txt * total pages on uplaod API * added the Confirmation Dialog * added the selected files into the confirmation modal * format and lint fixes * added the stop watch image * fileselection on alert dialog * Add timeout in docker for gunicorn workers * Add cancel icon to info popup (#384) * Info Modal Changes * css changes * recent merges * Integration_qa test (#375) * Test IntegrationQA added * update test cases * update test * update node count assertions * test changes * update changes * modification test * Code refatctor test cases * Handle allowedlist issue in test * test changes * update test * test case execution * test chatbot updates * test case update file * added file --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * fixed status blank issue * Rendering the file name instead of link for gcs and s3 sources in the info modal * added the default page size * Convert is_cancelled value from string to bool * Issue fixed Processed chunked as 0 when file re-process again * Youtube timestamps (#386) * Wikipedia source to accept all valid urls * wikipedia url to support multiple languages * integrated wiki langauge param for extract api * Youtube video timestamps --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * groq llm integration backend (#286) * groq llm integration backend * groq and description in node properties * added groq in options --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Save Total Pages in DB * Added total Pages * file selection when we didn't select anything from Main table * added the danger icon only for large files * added the overflow for more files and file selection for all new files * moved the interface to types * added the icon accoroding to the source * set total page for wiki and youtube * h3 heading * merge * updated the alert on basis if total pages * deleted chunks * polling based on total pages * isNan check * large file based on file size for s3 and gcs * file source in server side event * time calculation based on chunks for gcs and s3 --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * fixed the layout issue * Populate graph schema (#399) * crreate new endpoint populate_graph_schema and update the query for getting lables from DB * Added main.py changes * conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396) * added the condtion * removed llms * Fixed issue : Remove extra unused param * get emb only if used (#278) * Chatbot chunks (#402) * Added file name to the content sent to LLM * added chunk text in the response * increased the docs parts sent to llm * Modified graph query * mardown rendering * youtube starttime * icons * offset changes * removed the files due to codespace space issue --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user (#405) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * fixed css issue * fixed status blank issue * Modified response when no docs is retrived (#413) * Fixed env/docker-compose for local deployments + README doc (#410) * Fixed env/docker-compose for local deployments + README doc * wrong place for ENV in README * by default, removed langsmith + fixed knn score string to float * by default, removed langsmith + fixed knn score string to float * Fixed strings in docker-compose env * Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop) * Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that * Support for all unstructured files (#401) * all unstructured files * responsiveness * added file type * added the extensions * spell mistake * ppt file changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415) * added the json * added schema from text dialog * integrated the schemaAPI * added the alert * resize fixes * Extract schema using direct ChatOpenAI API and Chain * integrated the checkbox for schema to text dialog * Update SettingModal.tsx --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * gcs file content read via storage client (#417) * gcs file content read via storage client * added the access token the file state --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * pypdf2 to read files from gcs (#420) * 407 remove driver from frontend (#416) * removed driver * removed API * connecting to database on page refresh --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Css handling of info modal and Tooltips (#418) * css change * toolTips * Sidebar Tooltips * copy to clip * css change * added image types * added gcs * type fix * docker changes * speech * added the toolip for dropzone sources --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed retrival bugs (#421) * yarn format fixes * changed the delete message * added the cancel button * changed the message on tooltip * added space * UI fixes * tooltip for setting * updated req * wikipedia URL input (#424) * accept only wikipedia links * added wikipedia link * added wikilink regex * wikipedia single url only * changed the alert message * wording change * pushed validation state persist error --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * speech and copy (#422) * speech and copy * startTime * added chunk properties * tooltips --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * Fixed issue for out of range in KNN API * solved conflicts * conflict solved * Remove logging info from update KNN API * tooltip changes * format and lint fixes * responsiveness changes * Fixed issue for total pages GCS, S3 * UI polishing (#428) * button and tooltip changes * checking validation on change * settings module populate fix * format fixes * opening the modal after auth success * removed the limit * added the scrobar for dropdowns * speech state (#426) * speech state * Button Details changes * delete wording change * Total pages in buckets (#431) * page number NA for buckets * added N/A for gcs and s3 pages * total pages for gcs * remove unwanted logger --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> * removed the max width * Update FileTable.tsx * Update the docker file * Modified prompt (#438) * Update Dockerfile * Update Dockerfile * Update Dockerfile * rendering Fix * Local file upload gcs (#442) * Uplaod file to GCS * GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled * Add life cycle rule on uploaded bucket * pdf upload local and gcs bucket check * delete files when processed and extract changes --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> * Modified chat length and entities used (#443) * metadata for unstructured files (#446) * Unstructured file metadata (#447) * metadata for unstructured files * sleep in gcs upload * updated * icons added to chunks (#435) * icons added to chunks * info modal icons * fixed gcs status message issue * added if check for failed count * Null issue Fixed from backend for upload API and graph_document when model name mismatch * added word break issue * Added neo4j-rust-ext * processing time estimation based on bytes * File extension upper case fixed, File delete from GCS or local based on env variable. * timer per byte * Update Dockerfile * Adding sort rows on the table (#451) * Gcs upload folder hashed (#453) * implement foldername hashed in GCS bucket uplaod * Raise exception if invalid model selected * folder name for gcs upload --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * upload all unstructuredfiles to gcs (#455) * Mofified chunk query (#454) * Added libre office for fixing error -- soffice command was not found. Please install libreoffice on your system and try again. - Install instructions: https://www.libreoffice.org/get-help/install-howto/ - Mac: https://formulae.brew.sh/cask/libreoffice - Debian: https://wiki.debian.org/LibreOffice" * Fix the PARTIAL CONTENT issue * File-table no data found (#456) * 'file-table'' * review comment * Llm format change (#459) * changed the llm models format to lowercase * added the error message * llm model changes * format fixes * removed unused import * added the capitalize method * delete files from merged_file_path only if source is local file --------- Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> * commented total page code (#460) * format fixes * removed the disabled check on dropdown * Large file env * added upload api * changed the dropzone error message --------- Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Co-authored-by: Ajay Meena * format and lint fixes --------- Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com> Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com> Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com> Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com> Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com> Co-authored-by: Jayanth T Co-authored-by: Jerome Choo Co-authored-by: jeromechoo Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com> Co-authored-by: Ajay Meena Co-authored-by: Morgan Senechal Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com> Co-authored-by: Ikko Eltociear Ashimine Co-authored-by: Pravesh1988 Co-authored-by: edenbuaa Co-authored-by: kaustubh-darekar Co-authored-by: a-s-poorna Co-authored-by: Jayanth T Co-authored-by: Michael Hunger Co-authored-by: Kain Shu <44948284+Kain-90@users.noreply.github.com> Co-authored-by: Marcos Cannabrava <54267712+marcoscannabrava@users.noreply.github.com> --- README.md | 34 +- backend/example.env | 1 + backend/requirements.txt | 208 +- backend/score.py | 289 +- backend/src/QA_integration.py | 17 +- backend/src/chunkid_entities.py | 4 +- backend/src/create_chunks.py | 4 +- backend/src/diffbot_transformer.py | 3 +- backend/src/document_sources/gcs_bucket.py | 22 +- backend/src/document_sources/local_file.py | 2 - backend/src/document_sources/youtube.py | 54 +- backend/src/graphDB_dataAccess.py | 130 +- backend/src/graph_query.py | 2 - backend/src/llm.py | 31 +- backend/src/main.py | 66 +- backend/src/make_relationships.py | 2 +- backend/src/post_processing.py | 2 +- backend/src/ragas_eval.py | 45 + backend/src/shared/common_fn.py | 20 +- backend/src/shared/constants.py | 96 +- backend/src/shared/schema_extraction.py | 1 - docker-compose.yml | 5 +- docs/backend/backend_docs.adoc | 106 +- docs/frontend/frontend_docs.adoc | 74 +- frontend/Dockerfile | 7 +- frontend/README.md | 4 +- frontend/example.env | 2 + .../nginx/{nginx.conf => nginx.local.conf} | 30 +- frontend/nginx/nginx.prod.conf | 22 + frontend/package.json | 9 +- frontend/src/App.css | 15 +- frontend/src/HOC/CustomModal.tsx | 7 +- frontend/src/HOC/withVisibility.tsx | 14 + frontend/src/assets/images/chunks.svg | 221 ++ frontend/src/components/BreakDownPopOver.tsx | 33 + .../src/components/ChatBot/ChatInfoModal.tsx | 293 +- .../src/components/ChatBot/ChatModeToggle.tsx | 46 +- .../components/ChatBot/ChatModesSwitch.tsx | 20 +- .../components/ChatBot/ChatOnlyComponent.tsx | 158 + frontend/src/components/ChatBot/Chatbot.tsx | 73 +- frontend/src/components/ChatBot/ChunkInfo.tsx | 45 +- .../components/ChatBot/CommunitiesInfo.tsx | 16 +- .../src/components/ChatBot/EntitiesInfo.tsx | 18 +- .../ChatBot/ExpandedChatButtonContainer.tsx | 50 +- .../components/ChatBot/MetricsCheckbox.tsx | 21 + .../src/components/ChatBot/MetricsTab.tsx | 55 +- .../components/ChatBot/MultiModeMetrics.tsx | 230 +- .../components/ChatBot/NotAvailableMetric.tsx | 20 + .../src/components/ChatBot/SourcesInfo.tsx | 12 +- frontend/src/components/Content.tsx | 339 +-- .../components/DataSources/AWS/S3Modal.tsx | 62 +- .../components/DataSources/GCS/GCSModal.tsx | 70 +- .../components/DataSources/Local/DropZone.tsx | 16 +- .../Local/DropZoneForSmallLayouts.tsx | 31 +- frontend/src/components/Dropdown.tsx | 24 +- frontend/src/components/FileTable.tsx | 248 +- .../components/Graph/CheckboxSelection.tsx | 12 +- .../components/Graph/GraphPropertiesPanel.tsx | 15 +- .../components/Graph/GraphPropertiesTable.tsx | 4 +- .../src/components/Graph/GraphViewModal.tsx | 55 +- .../src/components/Graph/ResultOverview.tsx | 22 +- .../src/components/Layout/DrawerChatbot.tsx | 11 +- .../src/components/Layout/DrawerDropzone.tsx | 26 +- frontend/src/components/Layout/Header.tsx | 315 +- frontend/src/components/Layout/PageLayout.tsx | 233 +- frontend/src/components/Layout/SideNav.tsx | 161 +- .../components/Popups/ChunkPopUp/index.tsx | 40 +- .../ConnectionModal/ConnectionModal.tsx | 106 +- .../VectorIndexMisMatchAlert.tsx | 4 +- .../Popups/DeletePopUp/DeletePopUp.tsx | 8 +- .../Deduplication/index.tsx | 43 +- .../DeleteTabForOrphanNodes/index.tsx | 40 +- .../EntityExtractionSetting.tsx | 38 +- .../SelectedJobList.tsx | 6 +- .../PostProcessingCheckList/index.tsx | 6 +- .../Popups/GraphEnhancementDialog/index.tsx | 61 +- .../LargeFilePopUp/ConfirmationDialog.tsx | 6 +- .../Popups/LargeFilePopUp/LargeFilesAlert.tsx | 22 +- .../Popups/RetryConfirmation/Index.tsx | 30 +- .../Popups/Settings/SchemaFromText.tsx | 31 +- .../Popups/Settings/SettingModal.tsx | 285 -- frontend/src/components/QuickStarter.tsx | 22 +- .../src/components/UI/ButtonWithToolTip.tsx | 30 +- frontend/src/components/UI/CustomCheckBox.tsx | 2 +- frontend/src/components/UI/CustomMenu.tsx | 36 + frontend/src/components/UI/CustomPopOver.tsx | 15 + frontend/src/components/UI/ErrroBoundary.tsx | 4 +- frontend/src/components/UI/FallBackDialog.tsx | 2 +- frontend/src/components/UI/HoverableLink.tsx | 3 +- .../src/components/UI/IconButtonToolTip.tsx | 37 +- frontend/src/components/UI/Menu.tsx | 50 - frontend/src/components/UI/TipWrapper.tsx | 14 +- .../WebSources/CustomSourceInput.tsx | 39 +- .../WebSources/GenericSourceModal.tsx | 36 +- frontend/src/context/UserCredentials.tsx | 19 +- frontend/src/context/UsersFiles.tsx | 7 +- frontend/src/hooks/useSourceInput.tsx | 6 + frontend/src/hooks/useSse.tsx | 18 + frontend/src/router.tsx | 4 +- frontend/src/services/AdditionalMetrics.ts | 31 + frontend/src/services/ConnectAPI.ts | 14 +- frontend/src/services/GetFiles.ts | 4 +- frontend/src/types.ts | 59 +- frontend/src/utils/Constants.ts | 63 +- frontend/src/utils/Utils.ts | 6 +- frontend/yarn.lock | 2543 +++++++++-------- 106 files changed, 4808 insertions(+), 3265 deletions(-) rename frontend/nginx/{nginx.conf => nginx.local.conf} (94%) create mode 100644 frontend/nginx/nginx.prod.conf create mode 100644 frontend/src/HOC/withVisibility.tsx create mode 100644 frontend/src/assets/images/chunks.svg create mode 100644 frontend/src/components/BreakDownPopOver.tsx create mode 100644 frontend/src/components/ChatBot/ChatOnlyComponent.tsx create mode 100644 frontend/src/components/ChatBot/MetricsCheckbox.tsx create mode 100644 frontend/src/components/ChatBot/NotAvailableMetric.tsx delete mode 100644 frontend/src/components/Popups/Settings/SettingModal.tsx create mode 100644 frontend/src/components/UI/CustomMenu.tsx create mode 100644 frontend/src/components/UI/CustomPopOver.tsx delete mode 100644 frontend/src/components/UI/Menu.tsx create mode 100644 frontend/src/services/AdditionalMetrics.ts diff --git a/README.md b/README.md index fadf6eee9..e37d3ddfe 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Upload your files from local machine, GCS or S3 bucket or from web sources, choo - **Knowledge Graph Creation**: Transform unstructured data into structured knowledge graphs using LLMs. - **Providing Schema**: Provide your own custom schema or use existing schema in settings to generate graph. - **View Graph**: View graph for a particular source or multiple sources at a time in Bloom. -- **Chat with Data**: Interact with your data in a Neo4j database through conversational queries, also retrive metadata about the source of response to your queries. +- **Chat with Data**: Interact with your data in a Neo4j database through conversational queries, also retrieve metadata about the source of response to your queries. ## Getting started @@ -31,7 +31,7 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b ### Local deployment #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. -Accoroding to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need. +According to the environment, we are configuring the models which indicated by VITE_LLM_MODELS_PROD variable we can configure models based on our needs. EX: ```env VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" @@ -155,12 +155,23 @@ Allow unauthenticated request : Yes | VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | | VITE_LLM_MODELS_PROD | Optional | openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash | To Distinguish models based on the Enviornment PROD or DEV +| VITE_LLM_MODELS | Optional | 'diffbot,openai_gpt_3.5,openai_gpt_4o,openai_gpt_4o_mini,gemini_1.5_pro,gemini_1.5_flash,azure_ai_gpt_35,azure_ai_gpt_4o,ollama_llama3,groq_llama3_70b,anthropic_claude_3_5_sonnet' | Supported Models For the application | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | | RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | - +## LLMs Supported +1. OpenAI +2. Gemini +3. Azure OpenAI(dev) +4. Anthropic(dev) +5. Fireworks(dev) +6. Groq(dev) +7. Amazon Bedrock(dev) +8. Ollama(dev) +9. Diffbot +10. Other OpenAI compabtile baseurl models(dev) ## For local llms (Ollama) 1. Pull the docker imgage of ollama @@ -175,7 +186,7 @@ docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama ```bash docker exec -it ollama ollama run llama3 ``` -4. Configure env variable in docker compose or backend enviournment. +4. Configure env variable in docker compose or backend environment. ```env LLM_MODEL_CONFIG_ollama_ #example @@ -191,13 +202,14 @@ VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} ## Usage -1. Connect to Neo4j Aura Instance by passing URI and password or using Neo4j credentials file. -2. Choose your source from a list of Unstructured sources to create graph. -3. Change the LLM (if required) from drop down, which will be used to generate graph. -4. Optionally, define schema(nodes and relationship labels) in entity graph extraction settings. -5. Either select multiple files to 'Generate Graph' or all the files in 'New' status will be processed for graph creation. -6. Have a look at the graph for individial files using 'View' in grid or select one or more files and 'Preview Graph' -7. Ask questions related to the processed/completed sources to chat-bot, Also get detailed information about your answers generated by LLM. +1. Connect to Neo4j Aura Instance which can be both AURA DS or AURA DB by passing URI and password or using Neo4j credentials file. +2. To differntiate we have added different icons. For AURA DB we have a database icon and for AURA DS we have scientific molecule icon right under Neo4j Connection details label. +3. Choose your source from a list of Unstructured sources to create graph. +4. Change the LLM (if required) from drop down, which will be used to generate graph. +5. Optionally, define schema(nodes and relationship labels) in entity graph extraction settings. +6. Either select multiple files to 'Generate Graph' or all the files in 'New' status will be processed for graph creation. +7. Have a look at the graph for individual files using 'View' in grid or select one or more files and 'Preview Graph' +8. Ask questions related to the processed/completed sources to chat-bot, Also get detailed information about your answers generated by LLM. ## Links diff --git a/backend/example.env b/backend/example.env index 7fa3cb480..7cf9b13ac 100644 --- a/backend/example.env +++ b/backend/example.env @@ -31,6 +31,7 @@ DUPLICATE_TEXT_DISTANCE = "" #examples LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key" LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key" +LLM_MODEL_CONFIG_openai_gpt_4o="gpt-4o-2024-11-20,openai_api_key" LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002" LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002" LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key" diff --git a/backend/requirements.txt b/backend/requirements.txt index 8fc0e0bda..de1fc1136 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,183 +1,57 @@ -aiohttp==3.9.3 -aiosignal==1.3.1 -annotated-types==0.6.0 -antlr4-python3-runtime==4.9.3 -anyio==4.3.0 -async-timeout==4.0.3 asyncio==3.4.3 -attrs==23.2.0 -backoff==2.2.1 -beautifulsoup4==4.12.3 -boto3==1.34.140 -botocore==1.34.140 -cachetools==5.3.3 -certifi==2024.2.2 -cffi==1.16.0 -chardet==5.2.0 -charset-normalizer==3.3.2 -click==8.1.7 -coloredlogs==15.0.1 -contourpy==1.2.0 -cryptography==42.0.2 -cycler==0.12.1 -dataclasses-json==0.6.4 -dataclasses-json-speakeasy==0.5.11 -Deprecated==1.2.14 -distro==1.9.0 -docstring_parser==0.16 -effdet==0.4.1 -emoji==2.10.1 -exceptiongroup==1.2.0 -fastapi==0.111.0 +boto3==1.35.69 +botocore==1.35.69 +certifi==2024.8.30 +fastapi==0.115.5 fastapi-health==0.4.0 -filelock==3.13.1 -filetype==1.2.0 -flatbuffers==23.5.26 -fonttools==4.49.0 -frozenlist==1.4.1 -fsspec==2024.2.0 -google-api-core==2.18.0 -google-auth==2.29.0 -google_auth_oauthlib==1.2.0 -google-cloud-aiplatform==1.58.0 -google-cloud-bigquery==3.19.0 +google-api-core==2.23.0 +google-auth==2.36.0 +google_auth_oauthlib==1.2.1 google-cloud-core==2.4.1 -google-cloud-resource-manager==1.12.3 -google-cloud-storage==2.17.0 -google-crc32c==1.5.0 -google-resumable-media==2.7.0 -googleapis-common-protos==1.63.0 -greenlet==3.0.3 -grpc-google-iam-v1==0.13.0 -grpcio==1.62.1 -google-ai-generativelanguage==0.6.6 -grpcio-status==1.62.1 -h11==0.14.0 -httpcore==1.0.4 -httpx==0.27.0 -huggingface-hub -humanfriendly==10.0 -idna==3.6 -importlib-resources==6.1.1 +json-repair==0.30.2 pip-install==1.3.5 -iopath==0.1.10 -Jinja2==3.1.3 -jmespath==1.0.1 -joblib==1.3.2 -jsonpatch==1.33 -jsonpath-python==1.0.6 -jsonpointer==2.4 -json-repair==0.25.2 -kiwisolver==1.4.5 -langchain==0.3.0 -langchain-aws==0.2.1 -langchain-anthropic==0.2.1 -langchain-fireworks==0.2.0 -langchain-google-genai==2.0.0 -langchain-community==0.3.0 -langchain-core==0.3.5 -langchain-experimental==0.3.1 -langchain-google-vertexai==2.0.1 -langchain-groq==0.2.0 -langchain-openai==0.2.0 -langchain-text-splitters==0.3.0 +langchain==0.3.8 +langchain-aws==0.2.7 +langchain-anthropic==0.3.0 +langchain-fireworks==0.2.5 +langchain-community==0.3.8 +langchain-core==0.3.21 +langchain-experimental==0.3.3 +langchain-google-vertexai==2.0.7 +langchain-groq==0.2.1 +langchain-openai==0.2.9 +langchain-text-splitters==0.3.2 +langchain-huggingface==0.1.2 langdetect==1.0.9 -langsmith==0.1.128 -layoutparser==0.3.4 +langsmith==0.1.146 langserve==0.3.0 -#langchain-cli==0.0.25 -lxml==5.1.0 -MarkupSafe==2.1.5 -marshmallow==3.20.2 -matplotlib==3.7.2 -mpmath==1.3.0 -multidict==6.0.5 -mypy-extensions==1.0.0 neo4j-rust-ext -networkx==3.2.1 -nltk==3.8.1 -numpy==1.26.4 -omegaconf==2.3.0 -onnx==1.16.1 -onnxruntime==1.18.1 -openai==1.47.1 -opencv-python==4.8.0.76 -orjson==3.9.15 -packaging==23.2 -pandas==2.2.0 -pdf2image==1.17.0 -pdfminer.six==20221105 -pdfplumber==0.10.4 -pikepdf==8.11.0 -pillow==10.2.0 -pillow_heif==0.15.0 -portalocker==2.8.2 -proto-plus==1.23.0 -protobuf==4.23.4 -psutil==6.0.0 -pyasn1==0.6.0 -pyasn1_modules==0.4.0 -pycocotools==2.0.7 -pycparser==2.21 -pydantic==2.8.2 -pydantic_core==2.20.1 -pyparsing==3.0.9 -pypdf==4.0.1 -PyPDF2==3.0.1 -pypdfium2==4.27.0 -pytesseract==0.3.10 -python-dateutil==2.8.2 +nltk==3.9.1 +openai==1.55.1 +opencv-python==4.10.0.84 +psutil==6.1.0 +pydantic==2.9.0 python-dotenv==1.0.1 -python-iso639==2024.2.7 -python-magic==0.4.27 -python-multipart==0.0.9 -pytube==15.0.0 -pytz==2024.1 -PyYAML==6.0.1 -rapidfuzz==3.6.1 -regex==2023.12.25 -requests==2.32.3 -rsa==4.9 -s3transfer==0.10.1 -safetensors==0.4.1 -shapely==2.0.3 -six==1.16.0 -sniffio==1.3.1 -soupsieve==2.5 -starlette==0.37.2 -sse-starlette==2.1.2 +PyPDF2==3.0.1 +PyMuPDF==1.24.14 +starlette==0.41.3 +sse-starlette==2.1.3 starlette-session==0.4.3 -sympy==1.12 -tabulate==0.9.0 -tenacity==8.2.3 -tiktoken==0.7.0 -timm==0.9.12 -tokenizers==0.19 -tqdm==4.66.2 -transformers==4.42.3 -types-protobuf -types-requests -typing-inspect==0.9.0 -typing_extensions==4.12.2 -tzdata==2024.1 -unstructured==0.14.9 -unstructured-client==0.23.8 -unstructured-inference==0.7.36 -unstructured.pytesseract==0.3.12 -unstructured[all-docs]==0.14.9 +tqdm==4.67.1 +unstructured[all-docs]==0.16.6 urllib3==2.2.2 -uvicorn==0.30.1 -gunicorn==22.0.0 +uvicorn==0.32.1 +gunicorn==23.0.0 wikipedia==1.4.0 wrapt==1.16.0 yarl==1.9.4 -youtube-transcript-api==0.6.2 +youtube-transcript-api==0.6.3 zipp==3.17.0 -sentence-transformers==3.0.1 -google-cloud-logging==3.10.0 -PyMuPDF==1.24.5 +sentence-transformers==3.3.1 +google-cloud-logging==3.11.3 pypandoc==1.13 -graphdatascience==1.10 +graphdatascience==1.12 Secweb==1.11.0 -ragas==0.1.14 - +ragas==0.2.6 +rouge_score==0.1.2 +langchain-neo4j==0.1.1 \ No newline at end of file diff --git a/backend/score.py b/backend/score.py index 03fb75de9..cacbcc791 100644 --- a/backend/score.py +++ b/backend/score.py @@ -3,7 +3,6 @@ from fastapi.middleware.cors import CORSMiddleware from src.main import * from src.QA_integration import * -from src.entities.user_credential import user_credential from src.shared.common_fn import * import uvicorn import asyncio @@ -37,6 +36,7 @@ from src.ragas_eval import * from starlette.types import ASGIApp, Message, Receive, Scope, Send import gzip +from langchain_neo4j import Neo4jGraph logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -82,10 +82,9 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): await gzip_middleware(scope, receive, send) app = FastAPI() # SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) app.add_middleware(XContentTypeOptions) app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) -#app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) app.add_middleware( CORSMiddleware, @@ -124,10 +123,6 @@ async def create_source_knowledge_graph_url( try: start = time.time() - payload_json_obj = {'api_name':'url_scan', 'db_url':uri, 'userName':userName, 'database':database, 'source_url':source_url, 'aws_access_key_id':aws_access_key_id, - 'model':model, 'gcs_bucket_name':gcs_bucket_name, 'gcs_bucket_folder':gcs_bucket_folder, 'source_type':source_type, - 'gcs_project_id':gcs_project_id, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") if source_url is not None: source = source_url else: @@ -155,7 +150,9 @@ async def create_source_knowledge_graph_url( message = f"Source Node created successfully for source type: {source_type} and source: {source}" end = time.time() elapsed_time = end - start - json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}','userName':userName, 'database':database, 'aws_access_key_id':aws_access_key_id, + 'model':model, 'gcs_bucket_name':gcs_bucket_name, 'gcs_bucket_folder':gcs_bucket_folder, 'source_type':source_type, + 'gcs_project_id':gcs_project_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj, "INFO") result ={'elapsed_api_time' : f'{elapsed_time:.2f}'} return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name,data=result) @@ -209,14 +206,8 @@ async def extract_knowledge_graph_from_file( """ try: start_time = time.time() - payload_json_obj = {'api_name':'extract', 'db_url':uri, 'userName':userName, 'database':database, 'source_url':source_url, 'aws_access_key_id':aws_access_key_id, - 'model':model, 'gcs_bucket_name':gcs_bucket_name, 'gcs_bucket_folder':gcs_bucket_folder, 'source_type':source_type,'gcs_blob_filename':gcs_blob_filename, - 'file_name':file_name, 'gcs_project_id':gcs_project_id, 'wiki_query':wiki_query,'allowedNodes':allowedNodes,'allowedRelationship':allowedRelationship, - 'language':language ,'retry_condition':retry_condition,'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) - if source_type == 'local file': merged_file_path = os.path.join(MERGED_DIR,file_name) logging.info(f'File path:{merged_file_path}') @@ -240,6 +231,22 @@ async def extract_knowledge_graph_from_file( return create_api_response('Failed',message='source_type is other than accepted source') extract_api_time = time.time() - start_time if result is not None: + logging.info("Going for counting nodes and relationships in extract") + count_node_time = time.time() + graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + count_response = graphDb_data_Access.update_node_relationship_count(file_name) + logging.info("Nodes and Relationship Counts updated") + if count_response : + result['chunkNodeCount'] = count_response[file_name].get('chunkNodeCount',"0") + result['chunkRelCount'] = count_response[file_name].get('chunkRelCount',"0") + result['entityNodeCount']= count_response[file_name].get('entityNodeCount',"0") + result['entityEntityRelCount']= count_response[file_name].get('entityEntityRelCount',"0") + result['communityNodeCount']= count_response[file_name].get('communityNodeCount',"0") + result['communityRelCount']= count_response[file_name].get('communityRelCount',"0") + result['nodeCount'] = count_response[file_name].get('nodeCount',"0") + result['relationshipCount'] = count_response[file_name].get('relationshipCount',"0") + logging.info(f"counting completed in {(time.time()-count_node_time):.2f}") result['db_url'] = uri result['api_name'] = 'extract' result['source_url'] = source_url @@ -247,6 +254,17 @@ async def extract_knowledge_graph_from_file( result['source_type'] = source_type result['logging_time'] = formatted_time(datetime.now(timezone.utc)) result['elapsed_api_time'] = f'{extract_api_time:.2f}' + result['userName'] = userName + result['database'] = database + result['aws_access_key_id'] = aws_access_key_id + result['gcs_bucket_name'] = gcs_bucket_name + result['gcs_bucket_folder'] = gcs_bucket_folder + result['gcs_blob_filename'] = gcs_blob_filename + result['gcs_project_id'] = gcs_project_id + result['allowedNodes'] = allowedNodes + result['allowedRelationship'] = allowedRelationship + result['language'] = language + result['retry_condition'] = retry_condition logger.log_struct(result, "INFO") result.update(uri_latency) logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") @@ -279,15 +297,13 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None """ try: start = time.time() - payload_json_obj = {'api_name':'sources_list', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") decoded_password = decode_password(password) if " " in uri: uri = uri.replace(" ","+") result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'sources_list','db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response("Success",data=result, message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: @@ -300,33 +316,42 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None @app.post("/post_processing") async def post_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), tasks=Form(None)): try: - payload_json_obj = {'api_name':'post_processing', 'db_url':uri, 'userName':userName, 'database':database, 'tasks':tasks, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) tasks = set(map(str.strip, json.loads(tasks))) - + count_response = [] + start = time.time() if "materialize_text_chunk_similarities" in tasks: await asyncio.to_thread(update_graph, graph) - json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + api_name = 'post_processing/update_similarity_graph' logging.info(f'Updated KNN Graph') if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks: await asyncio.to_thread(create_vector_fulltext_indexes, uri=uri, username=userName, password=password, database=database) - json_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + api_name = 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom' logging.info(f'Full Text index created') if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks: await asyncio.to_thread(create_entity_embedding, graph) - json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} + api_name = 'post_processing/create_entity_embedding' logging.info(f'Entity Embeddings created') if "enable_communities" in tasks: + api_name = 'create_communities' await asyncio.to_thread(create_communities, uri, userName, password, database) - json_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logging.info(f'created communities') - logger.log_struct(json_obj) - return create_api_response('Success', message='All tasks completed successfully') + logging.info(f'created communities') + graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + document_name = "" + count_response = graphDb_data_Access.update_node_relationship_count(document_name) + if count_response: + count_response = [{"filename": filename, **counts} for filename, counts in count_response.items()] + logging.info(f'Updated source node with community related counts') + end = time.time() + elapsed_time = end - start + json_obj = {'api_name': api_name, 'db_url': uri, 'userName':userName, 'database':database, 'tasks':tasks, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + # logger.log_struct(json_obj) + return create_api_response('Success', data=count_response, message='All tasks completed successfully') except Exception as e: job_status = "Failed" @@ -343,9 +368,6 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"QA_RAG called at {datetime.now()}") qa_rag_start_time = time.time() try: - payload_json_obj = {'api_name':'chat_bot', 'db_url':uri, 'userName':userName, 'database':database, 'question':question,'document_names':document_names, - 'session_id':session_id, 'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") if mode == "graph": graph = Neo4jGraph( url=uri,username=userName,password=password,database=database,sanitize = True, refresh_schema=True) else: @@ -359,7 +381,8 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), logging.info(f"Total Response time is {total_call_time:.2f} seconds") result["info"]["response_time"] = round(total_call_time, 2) - json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id,'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{total_call_time:.2f}'} + json_obj = {'api_name':'chat_bot','db_url':uri, 'userName':userName, 'database':database, 'question':question,'document_names':document_names, + 'session_id':session_id, 'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{total_call_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) @@ -376,13 +399,11 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form()): try: start = time.time() - payload_json_obj = {'api_name':'chunk_entities', 'db_url':uri, 'userName':userName, 'database':database, 'nodedetails':nodedetails,'entities':entities, - 'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'chunk_entities','db_url':uri, 'userName':userName, 'database':database, 'nodedetails':nodedetails,'entities':entities, + 'mode':mode, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: @@ -401,7 +422,7 @@ async def get_neighbours(uri=Form(),userName=Form(), password=Form(), database=F result = await asyncio.to_thread(get_neighbour_nodes,uri=uri, username=userName, password=password,database=database, element_id=elementId) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'get_neighbours','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'get_neighbours', 'userName':userName, 'database':database,'db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: @@ -422,9 +443,6 @@ async def graph_query( document_names: str = Form(None), ): try: - payload_json_obj = {'api_name':'graph_query', 'db_url':uri, 'userName':userName, 'database':database, 'document_names':document_names, - 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") start = time.time() result = await asyncio.to_thread( get_graph_results, @@ -436,7 +454,7 @@ async def graph_query( ) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'graph_query','db_url':uri, 'userName':userName, 'database':database, 'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: @@ -452,10 +470,13 @@ async def graph_query( @app.post("/clear_chat_bot") async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=Form(), session_id=Form(None)): try: - payload_json_obj = {'api_name':'clear_chat_bot', 'db_url':uri, 'userName':userName, 'database':database, 'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(clear_chat_history,graph=graph,session_id=session_id) + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'clear_chat_bot', 'db_url':uri, 'userName':userName, 'database':database, 'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -470,13 +491,11 @@ async def clear_chat_bot(uri=Form(),userName=Form(), password=Form(), database=F async def connect(uri=Form(), userName=Form(), password=Form(), database=Form()): try: start = time.time() - payload_json_obj = {'api_name':'connect', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'connect','db_url':uri,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'connect','db_url':uri, 'userName':userName, 'database':database,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") result['elapsed_api_time'] = f'{elapsed_time:.2f}' return create_api_response('Success',data=result) @@ -493,22 +512,18 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber password=Form(), database=Form()): try: start = time.time() - payload_json_obj = {'api_name':'upload', 'db_url':uri, 'userName':userName, 'database':database, 'chunkNumber':chunkNumber,'totalChunks':totalChunks, - 'original_file_name':originalname,'model':model, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'upload','db_url':uri,'userName':userName, 'database':database, 'chunkNumber':chunkNumber,'totalChunks':totalChunks, + 'original_file_name':originalname,'model':model, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") - # result['elapsed_api_time'] = f'{elapsed_time:.2f}' if int(chunkNumber) == int(totalChunks): return create_api_response('Success',data=result, message='Source Node Created Successfully') else: return create_api_response('Success', message=result) except Exception as e: - # job_status = "Failed" message="Unable to upload large file into chunks. " error_message = str(e) logging.info(message) @@ -521,14 +536,12 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), database=Form()): try: start = time.time() - payload_json_obj = {'api_name':'schema', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) result = await asyncio.to_thread(get_labels_and_relationtypes, graph) end = time.time() elapsed_time = end - start logging.info(f'Schema result from DB: {result}') - json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'schema','db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success', data=result,message=f"Total elapsed API time {elapsed_time:.2f}") except Exception as e: @@ -545,6 +558,11 @@ def decode_password(pwd): decoded_password = sample_string_bytes.decode("utf-8") return decoded_password +def encode_password(pwd): + data_bytes = pwd.encode('ascii') + encoded_pwd_bytes = base64.b64encode(data_bytes) + return encoded_pwd_bytes + @app.get("/update_extract_status/{file_name}") async def update_extract_status(request:Request, file_name, url, userName, password, database): async def generate(): @@ -564,7 +582,6 @@ async def generate(): graph = create_graph_database_connection(uri, userName, decoded_password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.get_current_status_document_node(file_name) - # print(f'Result of document status in SSE : {result}') if len(result) > 0: status = json.dumps({'fileName':file_name, 'status':result[0]['Status'], @@ -575,7 +592,13 @@ async def generate(): 'total_chunks':result[0]['total_chunks'], 'fileSize':result[0]['fileSize'], 'processed_chunk':result[0]['processed_chunk'], - 'fileSource':result[0]['fileSource'] + 'fileSource':result[0]['fileSource'], + 'chunkNodeCount' : result[0]['chunkNodeCount'], + 'chunkRelCount' : result[0]['chunkRelCount'], + 'entityNodeCount' : result[0]['entityNodeCount'], + 'entityEntityRelCount' : result[0]['entityEntityRelCount'], + 'communityNodeCount' : result[0]['communityNodeCount'], + 'communityRelCount' : result[0]['communityRelCount'] }) yield status except asyncio.CancelledError: @@ -593,9 +616,6 @@ async def delete_document_and_entities(uri=Form(), deleteEntities=Form()): try: start = time.time() - payload_json_obj = {'api_name':'delete_document_and_entities', 'db_url':uri, 'userName':userName, 'database':database, 'filenames':filenames,'deleteEntities':deleteEntities, - 'source_types':source_types, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri) @@ -603,7 +623,8 @@ async def delete_document_and_entities(uri=Form(), message = f"Deleted {files_list_size} documents with entities from database" end = time.time() elapsed_time = end - start - json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'userName':userName, 'database':database, 'filenames':filenames,'deleteEntities':deleteEntities, + 'source_types':source_types, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',message=message) except Exception as e: @@ -637,11 +658,17 @@ async def get_document_status(file_name, url, userName, password, database): 'total_chunks':result[0]['total_chunks'], 'fileSize':result[0]['fileSize'], 'processed_chunk':result[0]['processed_chunk'], - 'fileSource':result[0]['fileSource'] + 'fileSource':result[0]['fileSource'], + 'chunkNodeCount' : result[0]['chunkNodeCount'], + 'chunkRelCount' : result[0]['chunkRelCount'], + 'entityNodeCount' : result[0]['entityNodeCount'], + 'entityEntityRelCount' : result[0]['entityEntityRelCount'], + 'communityNodeCount' : result[0]['communityNodeCount'], + 'communityRelCount' : result[0]['communityRelCount'] } else: status = {'fileName':file_name, 'status':'Failed'} - print(f'Result of document status in refresh : {result}') + logging.info(f'Result of document status in refresh : {result}') return create_api_response('Success',message="",file_name=status) except Exception as e: message=f"Unable to get the document status" @@ -652,12 +679,14 @@ async def get_document_status(file_name, url, userName, password, database): @app.post("/cancelled_job") async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=Form(), filenames=Form(None), source_types=Form(None)): try: - payload_json_obj = {'api_name':'cancelled_job', 'db_url':uri, 'userName':userName, 'database':database, - 'filenames':filenames,'source_types':source_types,'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) result = manually_cancelled_job(graph,filenames, source_types, MERGED_DIR, uri) - + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'cancelled_job','db_url':uri, 'userName':userName, 'database':database, 'filenames':filenames, + 'source_types':source_types, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',message=result) except Exception as e: job_status = "Failed" @@ -671,9 +700,12 @@ async def cancelled_job(uri=Form(), userName=Form(), password=Form(), database=F @app.post("/populate_graph_schema") async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None)): try: - payload_json_obj = {'api_name':'populate_graph_schema', 'model':model, 'is_schema_description_checked':is_schema_description_checked, 'input_text':input_text, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") + start = time.time() result = populate_graph_schema_from_text(input_text, model, is_schema_description_checked) + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'populate_graph_schema', 'model':model, 'is_schema_description_checked':is_schema_description_checked, 'input_text':input_text, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) except Exception as e: job_status = "Failed" @@ -687,15 +719,13 @@ async def populate_graph_schema(input_text=Form(None), model=Form(None), is_sche @app.post("/get_unconnected_nodes_list") async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form()): try: - payload_json_obj = {'api_name':'get_unconnected_nodes_list', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes() end = time.time() elapsed_time = end - start - json_obj = {'api_name':'get_unconnected_nodes_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'get_unconnected_nodes_list','db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=nodes_list,message=total_nodes) except Exception as e: @@ -710,16 +740,13 @@ async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form( @app.post("/delete_unconnected_nodes") async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),unconnected_entities_list=Form()): try: - payload_json_obj = {'api_name':'delete_unconnected_nodes', 'db_url':uri, 'userName':userName, 'database':database, - 'unconnected_entities_list':unconnected_entities_list, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'delete_unconnected_nodes','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'delete_unconnected_nodes','db_url':uri, 'userName':userName, 'database':database,'unconnected_entities_list':unconnected_entities_list, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result,message="Unconnected entities delete successfully") except Exception as e: @@ -735,14 +762,12 @@ async def delete_orphan_nodes(uri=Form(), userName=Form(), password=Form(), data async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form()): try: start = time.time() - payload_json_obj = {'api_name':'get_duplicate_nodes', 'db_url':uri, 'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() end = time.time() elapsed_time = end - start - json_obj = {'api_name':'get_duplicate_nodes','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'get_duplicate_nodes','db_url':uri,'userName':userName, 'database':database, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=nodes_list, message=total_nodes) except Exception as e: @@ -758,15 +783,13 @@ async def get_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), data async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(),duplicate_nodes_list=Form()): try: start = time.time() - payload_json_obj = {'api_name':'merge_duplicate_nodes', 'db_url':uri, 'userName':userName, 'database':database, - 'duplicate_nodes_list':duplicate_nodes_list, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list) end = time.time() elapsed_time = end - start - json_obj = {'api_name':'merge_duplicate_nodes','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + json_obj = {'api_name':'merge_duplicate_nodes','db_url':uri, 'userName':userName, 'database':database, + 'duplicate_nodes_list':duplicate_nodes_list, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result,message="Duplicate entities merged successfully") except Exception as e: @@ -781,12 +804,15 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da @app.post("/drop_create_vector_index") async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()): try: - payload_json_obj = {'api_name':'drop_create_vector_index', 'db_url':uri, 'userName':userName, 'database':database, - 'isVectorIndexExist':isVectorIndexExist, 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) result = graphDb_data_Access.drop_create_vector_index(isVectorIndexExist) + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'drop_create_vector_index', 'db_url':uri, 'userName':userName, 'database':database, + 'isVectorIndexExist':isVectorIndexExist, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") return create_api_response('Success',message=result) except Exception as e: job_status = "Failed" @@ -800,13 +826,15 @@ async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), @app.post("/retry_processing") async def retry_processing(uri=Form(), userName=Form(), password=Form(), database=Form(), file_name=Form(), retry_condition=Form()): try: - payload_json_obj = {'api_name':'retry_processing', 'db_url':uri, 'userName':userName, 'database':database, 'file_name':file_name,'retry_condition':retry_condition, - 'logging_time': formatted_time(datetime.now(timezone.utc))} - logger.log_struct(payload_json_obj, "INFO") + start = time.time() graph = create_graph_database_connection(uri, userName, password, database) await asyncio.to_thread(set_status_retry, graph,file_name,retry_condition) - #set_status_retry(graph,file_name,retry_condition) - return create_api_response('Success',message=f"Status set to Reprocess for filename : {file_name}") + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'retry_processing', 'db_url':uri, 'userName':userName, 'database':database, 'file_name':file_name,'retry_condition':retry_condition, + 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") + return create_api_response('Success',message=f"Status set to Ready to Reprocess for filename : {file_name}") except Exception as e: job_status = "Failed" message="Unable to set status to Retry" @@ -822,21 +850,59 @@ async def calculate_metric(question: str = Form(), answer: str = Form(), model: str = Form(), mode: str = Form()): + try: + start = time.time() + context_list = [str(item).strip() for item in json.loads(context)] if context else [] + answer_list = [str(item).strip() for item in json.loads(answer)] if answer else [] + mode_list = [str(item).strip() for item in json.loads(mode)] if mode else [] + + result = await asyncio.to_thread( + get_ragas_metrics, question, context_list, answer_list, model + ) + if result is None or "error" in result: + return create_api_response( + 'Failed', + message='Failed to calculate evaluation metrics.', + error=result.get("error", "Ragas evaluation returned null") + ) + data = {mode: {metric: result[metric][i] for metric in result} for i, mode in enumerate(mode_list)} + end = time.time() + elapsed_time = end - start + json_obj = {'api_name':'metric', 'question':question, 'context':context, 'answer':answer, 'model':model,'mode':mode, + 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'} + logger.log_struct(json_obj, "INFO") + return create_api_response('Success', data=data) + except Exception as e: + logging.exception(f"Error while calculating evaluation metrics: {e}") + return create_api_response( + 'Failed', + message="Error while calculating evaluation metrics", + error=str(e) + ) + finally: + gc.collect() + + +@app.post('/additional_metrics') +async def calculate_additional_metrics(question: str = Form(), + context: str = Form(), + answer: str = Form(), + reference: str = Form(), + model: str = Form(), + mode: str = Form(), +): try: context_list = [str(item).strip() for item in json.loads(context)] if context else [] answer_list = [str(item).strip() for item in json.loads(answer)] if answer else [] mode_list = [str(item).strip() for item in json.loads(mode)] if mode else [] - - result = await asyncio.to_thread( - get_ragas_metrics, question, context_list, answer_list, model - ) + result = await get_additional_metrics(question, context_list,answer_list, reference, model) if result is None or "error" in result: return create_api_response( 'Failed', message='Failed to calculate evaluation metrics.', error=result.get("error", "Ragas evaluation returned null") ) - data = {mode: {metric: result[metric][i] for metric in result} for i, mode in enumerate(mode_list)} + data = {mode: {metric: result[i][metric] for metric in result[i]} for i, mode in enumerate(mode_list)} return create_api_response('Success', data=data) except Exception as e: logging.exception(f"Error while calculating evaluation metrics: {e}") @@ -883,6 +949,8 @@ async def fetch_chunktext( json_obj = { 'api_name': 'fetch_chunktext', 'db_url': uri, + 'userName': userName, + 'database': database, 'document_name': document_name, 'page_no': page_no, 'logging_time': formatted_time(datetime.now(timezone.utc)), @@ -900,5 +968,34 @@ async def fetch_chunktext( gc.collect() +@app.post("/backend_connection_configuation") +async def backend_connection_configuation(): + try: + graph = Neo4jGraph() + logging.info(f'login connection status of object: {graph}') + if graph is not None: + graph_connection = True + isURI = os.getenv('NEO4J_URI') + isUsername= os.getenv('NEO4J_USERNAME') + isDatabase= os.getenv('NEO4J_DATABASE') + isPassword= os.getenv('NEO4J_PASSWORD') + encoded_password = encode_password(isPassword) + graphDb_data_Access = graphDBdataAccess(graph) + gds_status = graphDb_data_Access.check_gds_version() + write_access = graphDb_data_Access.check_account_access(database=isDatabase) + return create_api_response('Success',message=f"Backend connection successful",data={'graph_connection':graph_connection,'uri':isURI,'user_name':isUsername,'database':isDatabase,'password':encoded_password,'gds_status':gds_status,'write_access':write_access}) + else: + graph_connection = False + return create_api_response('Success',message=f"Backend connection is not successful",data=graph_connection) + except Exception as e: + graph_connection = False + job_status = "Failed" + message="Unable to connect backend DB" + error_message = str(e) + logging.exception(f'{error_message}') + return create_api_response(job_status, message=message, error=error_message + ' or fill from the login dialog', data=graph_connection) + finally: + gc.collect() + if __name__ == "__main__": - uvicorn.run(app) + uvicorn.run(app) \ No newline at end of file diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index b7fcbd665..f50a36efb 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -4,15 +4,13 @@ import logging import threading -from concurrent.futures import ThreadPoolExecutor from datetime import datetime from typing import Any from dotenv import load_dotenv - -# LangChain imports -from langchain_community.vectorstores.neo4j_vector import Neo4jVector -from langchain_community.chat_message_histories import Neo4jChatMessageHistory +from langchain_neo4j import Neo4jVector +from langchain_neo4j import Neo4jChatMessageHistory +from langchain_neo4j import GraphCypherQAChain from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableBranch @@ -21,7 +19,6 @@ from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain_text_splitters import TokenTextSplitter from langchain_core.messages import HumanMessage, AIMessage -from langchain.chains import GraphCypherQAChain from langchain_community.chat_message_histories import ChatMessageHistory from langchain_core.callbacks import StdOutCallbackHandler, BaseCallbackHandler @@ -38,8 +35,6 @@ from src.llm import get_llm from src.shared.common_fn import load_embedding_model from src.shared.constants import * -from src.graphDB_dataAccess import graphDBdataAccess -from src.ragas_eval import get_ragas_metrics load_dotenv() EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') @@ -278,7 +273,9 @@ def retrieve_documents(doc_retriever, messages): except Exception as e: error_message = f"Error retrieving documents: {str(e)}" logging.error(error_message) - raise RuntimeError(error_message) + docs = None + transformed_question = None + return docs,transformed_question @@ -660,7 +657,7 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access if document_names and not chat_mode_settings["document_filter"]: result = { "session_id": "", - "message": "This chat mode does support document selection", + "message": "Please deselect all documents in the table before using this chat mode", "info": { "sources": [], "model": "", diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 31ae07496..7d23e23dd 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -74,8 +74,8 @@ def process_chunk_data(chunk_data): for chunk in record["chunks"]: chunk.update(doc_properties) if chunk["fileSource"] == "youtube": - chunk["start_time"] = min(time_to_seconds(chunk["start_time"]),time_to_seconds(chunk["end_time"])) - chunk["end_time"] = time_to_seconds(chunk["end_time"]) + chunk["start_time"] = min(time_to_seconds(chunk.get('start_time',0)),time_to_seconds(chunk.get("end_time",0))) + chunk["end_time"] = time_to_seconds(chunk.get("end_time",0)) chunk_properties.append(chunk) return chunk_properties diff --git a/backend/src/create_chunks.py b/backend/src/create_chunks.py index 621785a31..d5e93d14f 100644 --- a/backend/src/create_chunks.py +++ b/backend/src/create_chunks.py @@ -1,8 +1,7 @@ from langchain_text_splitters import TokenTextSplitter from langchain.docstore.document import Document -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph import logging -import os from src.document_sources.youtube import get_chunks_with_timestamps, get_calculated_timestamps import re @@ -25,7 +24,6 @@ def split_file_into_chunks(self): A list of chunks each of which is a langchain Document. """ logging.info("Split file into smaller chunks") - # number_of_chunks_allowed = int(os.environ.get('NUMBER_OF_CHUNKS_ALLOWED')) text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20) if 'page' in self.pages[0].metadata: chunks = [] diff --git a/backend/src/diffbot_transformer.py b/backend/src/diffbot_transformer.py index a8e8db3fb..e16e54efb 100644 --- a/backend/src/diffbot_transformer.py +++ b/backend/src/diffbot_transformer.py @@ -1,5 +1,6 @@ from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -from langchain_community.graphs import Neo4jGraph +#from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from langchain.docstore.document import Document from typing import List import os diff --git a/backend/src/document_sources/gcs_bucket.py b/backend/src/document_sources/gcs_bucket.py index 91830f591..3aaf42e12 100644 --- a/backend/src/document_sources/gcs_bucket.py +++ b/backend/src/document_sources/gcs_bucket.py @@ -59,8 +59,14 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g if access_token is None: storage_client = storage.Client(project=gcs_project_id) - loader = GCSFileLoader(project_name=gcs_project_id, bucket=gcs_bucket_name, blob=blob_name, loader_func=load_document_content) - pages = loader.load() + bucket = storage_client.bucket(gcs_bucket_name) + blob = bucket.blob(blob_name) + + if blob.exists(): + loader = GCSFileLoader(project_name=gcs_project_id, bucket=gcs_bucket_name, blob=blob_name, loader_func=load_document_content) + pages = loader.load() + else : + raise Exception('File does not exist, Please re-upload the file and try again.') else: creds= Credentials(access_token) storage_client = storage.Client(project=gcs_project_id, credentials=creds) @@ -77,7 +83,7 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g text += page.extract_text() pages = [Document(page_content = text)] else: - raise Exception('Blob Not Found') + raise Exception(f'File Not Found in GCS bucket - {gcs_bucket_name}') return gcs_blob_filename, pages def upload_file_to_gcs(file_chunk, chunk_number, original_file_name, bucket_name, folder_name_sha1_hashed): @@ -101,15 +107,12 @@ def merge_file_gcs(bucket_name, original_file_name: str, folder_name_sha1_hashed try: storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - # Retrieve chunks from GCS - # blobs = storage_client.list_blobs(bucket_name, prefix=folder_name_sha1_hashed) - # print(f'before sorted blobs: {blobs}') chunks = [] for i in range(1,total_chunks+1): blob_name = folder_name_sha1_hashed + '/' + f"{original_file_name}_part_{i}" blob = bucket.blob(blob_name) if blob.exists(): - print(f'Blob Name: {blob.name}') + logging.info(f'Blob Name: {blob.name}') chunks.append(blob.download_as_bytes()) blob.delete() @@ -146,7 +149,8 @@ def copy_failed_file(source_bucket_name,dest_bucket_name,folder_name, file_name) dest_bucket = storage_client.bucket(dest_bucket_name) folder_file_name = folder_name +'/'+file_name source_blob = source_bucket.blob(folder_file_name) - source_bucket.copy_blob(source_blob, dest_bucket, file_name) - logging.info(f'Failed file {file_name} copied to {dest_bucket_name} from {source_bucket_name} in GCS successfully') + if source_blob.exists(): + source_bucket.copy_blob(source_blob, dest_bucket, file_name) + logging.info(f'Failed file {file_name} copied to {dest_bucket_name} from {source_bucket_name} in GCS successfully') except Exception as e: raise Exception(e) diff --git a/backend/src/document_sources/local_file.py b/backend/src/document_sources/local_file.py index ed46210f4..3d5bc08db 100644 --- a/backend/src/document_sources/local_file.py +++ b/backend/src/document_sources/local_file.py @@ -20,10 +20,8 @@ def load_document_content(file_path): if Path(file_path).suffix.lower() == '.pdf': - print("in if") return PyMuPDFLoader(file_path) else: - print("in else") return UnstructuredFileLoader(file_path, mode="elements",autodetect_encoding=True) def get_documents_from_file_by_path(file_path,file_name): diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index e30de301e..dee97e230 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -1,25 +1,16 @@ -from pathlib import Path from langchain.docstore.document import Document -from langchain_community.document_loaders import YoutubeLoader -from pytube import YouTube from youtube_transcript_api import YouTubeTranscriptApi import logging from urllib.parse import urlparse,parse_qs from difflib import SequenceMatcher from datetime import timedelta -from langchain_community.document_loaders.youtube import TranscriptFormat from src.shared.constants import YOUTUBE_CHUNK_SIZE_SECONDS from typing import List, Dict, Any import os import re -from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader def get_youtube_transcript(youtube_id): try: - #transcript = YouTubeTranscriptApi.get_transcript(youtube_id) - # transcript_list = YouTubeTranscriptApi.list_transcripts(youtube_id) - # transcript = transcript_list.find_transcript(["en"]) - # transcript_pieces: List[Dict[str, Any]] = transcript.fetch() proxy = os.environ.get("YOUTUBE_TRANSCRIPT_PROXY") proxies = { 'https': proxy } transcript_pieces = YouTubeTranscriptApi.get_transcript(youtube_id, proxies = proxies) @@ -28,21 +19,12 @@ def get_youtube_transcript(youtube_id): message = f"Youtube transcript is not available for youtube Id: {youtube_id}" raise Exception(message) -# def get_youtube_combined_transcript(youtube_id): -# try: -# transcript_dict = get_youtube_transcript(youtube_id) -# transcript = YouTubeTranscriptApi.get_transcript(youtube_id) -# return transcript -# except Exception as e: -# message = f"Youtube transcript is not available for youtube Id: {youtube_id}" -# raise Exception(message) - def get_youtube_combined_transcript(youtube_id): try: transcript_dict = get_youtube_transcript(youtube_id) transcript='' for td in transcript_dict: - transcript += ''.join(td['text']) + transcript += ''.join(td['text'])+" " return transcript except Exception as e: message = f"Youtube transcript is not available for youtube Id: {youtube_id}" @@ -64,28 +46,20 @@ def create_youtube_url(url): def get_documents_from_youtube(url): try: match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',url) - # youtube_loader = YoutubeLoader.from_youtube_url(url, - # language=["en-US", "en-gb", "en-ca", "en-au","zh-CN", "zh-Hans", "zh-TW", "fr-FR","de-DE","it-IT","ja-JP","pt-BR","ru-RU","es-ES"], - # translation = "en", - # add_video_info=True, - # transcript_format=TranscriptFormat.CHUNKS, - # chunk_size_seconds=YOUTUBE_CHUNK_SIZE_SECONDS) - # video_id = parse_qs(urlparse(url).query).get('v') - # cred_path = os.path.join(os.getcwd(),"llm-experiments_credentials.json") - # print(f'Credential file path on youtube.py {cred_path}') - # google_api_client = GoogleApiClient(service_account_path=Path(cred_path)) - # youtube_loader_channel = GoogleApiYoutubeLoader( - # google_api_client=google_api_client, - # video_ids=[video_id[0].strip()], add_video_info=True - # ) - # youtube_transcript = youtube_loader_channel.load() - # pages = youtube_loader.load() - # print(f'youtube page_content: {youtube_transcript[0].page_content}') - # print(f'youtube id: {youtube_transcript[0].metadata["id"]}') - # print(f'youtube title: {youtube_transcript[0].metadata["snippet"]["title"]}') - transcript= get_youtube_combined_transcript(match.group(1)) + transcript= get_youtube_transcript(match.group(1)) + transcript_content='' + counter = YOUTUBE_CHUNK_SIZE_SECONDS + pages = [] + for i, td in enumerate(transcript): + if td['start'] < counter: + transcript_content += ''.join(td['text'])+" " + else : + transcript_content += ''.join(td['text'])+" " + pages.append(Document(page_content=transcript_content.strip(), metadata={'start_timestamp':str(timedelta(seconds = counter-YOUTUBE_CHUNK_SIZE_SECONDS)).split('.')[0], 'end_timestamp':str(timedelta(seconds = td['start'])).split('.')[0]})) + counter += YOUTUBE_CHUNK_SIZE_SECONDS + transcript_content='' + pages.append(Document(page_content=transcript_content.strip(), metadata={'start_timestamp':str(timedelta(seconds = counter-YOUTUBE_CHUNK_SIZE_SECONDS)).split('.')[0], 'end_timestamp':str(timedelta(seconds =transcript[-1]['start'] if transcript else counter)).split('.')[0]})) # Handle empty transcript_pieces file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] - pages = [Document(page_content=transcript)] return file_name, pages except Exception as e: error_message = str(e) diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 82391d1e4..aa9034f4c 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -1,10 +1,9 @@ import logging import os -from datetime import datetime -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from src.shared.common_fn import create_gcs_bucket_folder_name_hashed, delete_uploaded_local_file, load_embedding_model from src.document_sources.gcs_bucket import delete_file_from_gcs -from src.shared.constants import BUCKET_UPLOAD +from src.shared.constants import BUCKET_UPLOAD,NODEREL_COUNT_QUERY_WITH_COMMUNITY, NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY from src.entities.source_node import sourceNode from src.communities import MAX_COMMUNITY_LEVELS import json @@ -103,7 +102,7 @@ def update_source_node(self, obj_source_node:sourceNode): param= {"props":params} - print(f'Base Param value 1 : {param}') + logging.info(f'Base Param value 1 : {param}') query = "MERGE(d:Document {fileName :$props.fileName}) SET d += $props" logging.info("Update source node properties") self.graph.query(query,param) @@ -187,14 +186,11 @@ def check_gds_version(self): result = self.graph.query(gds_procedure_count) total_gds_procedures = result[0]['totalGdsProcedures'] if result else 0 - enable_communities = os.environ.get('ENABLE_COMMUNITIES','').upper() == "TRUE" - logging.info(f"Enable Communities {enable_communities}") - - if enable_communities and total_gds_procedures > 0: + if total_gds_procedures > 0: logging.info("GDS is available in the database.") return True else: - logging.info("Communities are disabled or GDS is not available in the database.") + logging.info("GDS is not available in the database.") return False except Exception as e: logging.error(f"An error occurred while checking GDS version: {e}") @@ -249,7 +245,13 @@ def get_current_status_document_node(self, file_name): MATCH(d:Document {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime, d.nodeCount AS nodeCount, d.model as model, d.relationshipCount as relationshipCount, d.total_chunks AS total_chunks , d.fileSize as fileSize, - d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk, d.fileSource as fileSource + d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk, d.fileSource as fileSource, + d.chunkNodeCount AS chunkNodeCount, + d.chunkRelCount AS chunkRelCount, + d.entityNodeCount AS entityNodeCount, + d.entityEntityRelCount AS entityEntityRelCount, + d.communityNodeCount AS communityNodeCount, + d.communityRelCount AS communityRelCount """ param = {"file_name" : file_name} return self.execute_query(query, param) @@ -277,27 +279,33 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me return count(*) as deletedChunks """ query_to_delete_document_and_entities=""" - match (d:Document) where d.fileName IN $filename_list and d.fileSource in $source_types_list - detach delete d - with collect(d) as documents - unwind documents as d - match (d)<-[:PART_OF]-(c:Chunk) - detach delete c - with * - match (c)-[:HAS_ENTITY]->(e) - where not exists { (e)<-[:HAS_ENTITY]-()-[:PART_OF]->(d2) where not d2 in documents } - detach delete e - """ + MATCH (d:Document) + WHERE d.fileName IN $filename_list AND d.fileSource IN $source_types_list + WITH COLLECT(d) AS documents + UNWIND documents AS d + OPTIONAL MATCH (d)<-[:PART_OF]-(c:Chunk) + OPTIONAL MATCH (c:Chunk)-[:HAS_ENTITY]->(e) + WITH d, c, e, documents + WHERE NOT EXISTS { + MATCH (e)<-[:HAS_ENTITY]-(c2)-[:PART_OF]->(d2:Document) + WHERE NOT d2 IN documents + } + WITH d, COLLECT(c) AS chunks, COLLECT(e) AS entities + FOREACH (chunk IN chunks | DETACH DELETE chunk) + FOREACH (entity IN entities | DETACH DELETE entity) + DETACH DELETE d + """ query_to_delete_communities = """ - MATCH (c:`__Community__`) - WHERE NOT EXISTS { ()-[:IN_COMMUNITY]->(c) } AND c.level = 0 - DETACH DELETE c - - WITH * - UNWIND range(1, $max_level) AS level - MATCH (c:`__Community__`) - WHERE c.level = level AND NOT EXISTS { (c)<-[:PARENT_COMMUNITY]-(child) } + MATCH (c:`__Community__`) + WHERE c.level = 0 AND NOT EXISTS { ()-[:IN_COMMUNITY]->(c) } DETACH DELETE c + WITH 1 AS dummy + UNWIND range(1, $max_level) AS level + CALL (level) { + MATCH (c:`__Community__`) + WHERE c.level = level AND NOT EXISTS { ()-[:PARENT_COMMUNITY]->(c) } + DETACH DELETE c + } """ param = {"filename_list" : filename_list, "source_types_list": source_types_list} community_param = {"max_level":MAX_COMMUNITY_LEVELS} @@ -402,7 +410,7 @@ def get_duplicate_nodes_list(self): def merge_duplicate_nodes(self,duplicate_nodes_list): nodes_list = json.loads(duplicate_nodes_list) - print(f'Nodes list to merge {nodes_list}') + logging.info(f'Nodes list to merge {nodes_list}') query = """ UNWIND $rows AS row CALL { with row @@ -441,3 +449,65 @@ def drop_create_vector_index(self, isVectorIndexExist): } ) return "Drop and Re-Create vector index succesfully" + + + def update_node_relationship_count(self,document_name): + logging.info("updating node and relationship count") + label_query = """CALL db.labels""" + community_flag = {'label': '__Community__'} in self.execute_query(label_query) + if (not document_name) and (community_flag): + result = self.execute_query(NODEREL_COUNT_QUERY_WITH_COMMUNITY) + elif (not document_name) and (not community_flag): + return [] + else: + param = {"document_name": document_name} + result = self.execute_query(NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY, param) + response = {} + for record in result: + filename = record["filename"] + chunkNodeCount = record["chunkNodeCount"] + chunkRelCount = record["chunkRelCount"] + entityNodeCount = record["entityNodeCount"] + entityEntityRelCount = record["entityEntityRelCount"] + if (not document_name) and (community_flag): + communityNodeCount = record["communityNodeCount"] + communityRelCount = record["communityRelCount"] + else: + communityNodeCount = 0 + communityRelCount = 0 + nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount) + relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount) + update_query = """ + MATCH (d:Document {fileName: $filename}) + SET d.chunkNodeCount = $chunkNodeCount, + d.chunkRelCount = $chunkRelCount, + d.entityNodeCount = $entityNodeCount, + d.entityEntityRelCount = $entityEntityRelCount, + d.communityNodeCount = $communityNodeCount, + d.communityRelCount = $communityRelCount, + d.nodeCount = $nodeCount, + d.relationshipCount = $relationshipCount + """ + self.execute_query(update_query,{ + "filename": filename, + "chunkNodeCount": chunkNodeCount, + "chunkRelCount": chunkRelCount, + "entityNodeCount": entityNodeCount, + "entityEntityRelCount": entityEntityRelCount, + "communityNodeCount": communityNodeCount, + "communityRelCount": communityRelCount, + "nodeCount" : nodeCount, + "relationshipCount" : relationshipCount + }) + + response[filename] = {"chunkNodeCount": chunkNodeCount, + "chunkRelCount": chunkRelCount, + "entityNodeCount": entityNodeCount, + "entityEntityRelCount": entityEntityRelCount, + "communityNodeCount": communityNodeCount, + "communityRelCount": communityRelCount, + "nodeCount" : nodeCount, + "relationshipCount" : relationshipCount + } + + return response \ No newline at end of file diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 86739ba6c..dc5a64a2c 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -207,8 +207,6 @@ def get_graph_results(uri, username, password,database,document_names): document_nodes = extract_node_elements(records) document_relationships = extract_relationships(records) - print(query) - logging.info(f"no of nodes : {len(document_nodes)}") logging.info(f"no of relations : {len(document_relationships)}") result = { diff --git a/backend/src/llm.py b/backend/src/llm.py index 93ee0f08f..f19648ed6 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -6,18 +6,13 @@ from langchain_groq import ChatGroq from langchain_google_vertexai import HarmBlockThreshold, HarmCategory from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -import concurrent.futures -from concurrent.futures import ThreadPoolExecutor from langchain_experimental.graph_transformers import LLMGraphTransformer -from langchain_core.prompts import ChatPromptTemplate from langchain_anthropic import ChatAnthropic from langchain_fireworks import ChatFireworks from langchain_aws import ChatBedrock from langchain_community.chat_models import ChatOllama import boto3 import google.auth -from src.shared.constants import MODEL_VERSIONS, PROMPT_TO_ALL_LLMs - def get_llm(model: str): """Retrieve the specified language model based on the model name.""" @@ -28,7 +23,6 @@ def get_llm(model: str): if "gemini" in model: model_name = env_value credentials, project_id = google.auth.default() - #model_name = MODEL_VERSIONS[model] llm = ChatVertexAI( model_name=model_name, #convert_system_message_to_human=True, @@ -44,7 +38,6 @@ def get_llm(model: str): }, ) elif "openai" in model: - #model_name = MODEL_VERSIONS[model] model_name, api_key = env_value.split(",") llm = ChatOpenAI( api_key=api_key, @@ -144,6 +137,16 @@ def get_combined_chunks(chunkId_chunkDoc_list): ) return combined_chunk_document_list +def get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list): + combined_chunk_document_list = [ + Document( + page_content=document["chunk_doc"].page_content, + metadata={"chunk_id": [document["chunk_id"]]}, + ) + for document in chunkId_chunkDoc_list + ] + return combined_chunk_document_list + async def get_graph_document_list( llm, combined_chunk_document_list, allowedNodes, allowedRelationship @@ -166,20 +169,7 @@ async def get_graph_document_list( allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, ignore_tool_usage=True, - #prompt = ChatPromptTemplate.from_messages(["system",PROMPT_TO_ALL_LLMs]) ) - # with ThreadPoolExecutor(max_workers=10) as executor: - # for chunk in combined_chunk_document_list: - # chunk_doc = Document( - # page_content=chunk.page_content.encode("utf-8"), metadata=chunk.metadata - # ) - # futures.append( - # executor.submit(llm_transformer.convert_to_graph_documents, [chunk_doc]) - # ) - - # for i, future in enumerate(concurrent.futures.as_completed(futures)): - # graph_document = future.result() - # graph_document_list.append(graph_document[0]) if isinstance(llm,DiffbotGraphTransformer): graph_document_list = llm_transformer.convert_to_graph_documents(combined_chunk_document_list) @@ -192,6 +182,7 @@ async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowed llm, model_name = get_llm(model) combined_chunk_document_list = get_combined_chunks(chunkId_chunkDoc_list) + #combined_chunk_document_list = get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list) if allowedNodes is None or allowedNodes=="": allowedNodes =[] diff --git a/backend/src/main.py b/backend/src/main.py index 05f83f073..5ef1e4354 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1,4 +1,4 @@ -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from src.shared.constants import (BUCKET_UPLOAD, PROJECT_ID, QUERY_TO_GET_CHUNKS, QUERY_TO_DELETE_EXISTING_ENTITIES, QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, @@ -8,7 +8,6 @@ DELETE_ENTITIES_AND_START_FROM_BEGINNING, QUERY_TO_GET_NODES_AND_RELATIONS_OF_A_DOCUMENT) from src.shared.schema_extraction import schema_extraction_from_text -from langchain_community.document_loaders import GoogleApiClient, GoogleApiYoutubeLoader from dotenv import load_dotenv from datetime import datetime import logging @@ -27,7 +26,6 @@ import re from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader import warnings -from pytube import YouTube import sys import shutil import urllib.parse @@ -142,34 +140,13 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) logging.info(f"match value: {match}") - # file_path = os.path.join(os.path.dirname(__file__),"llm-experiments_credentials.json") - # logging.info(f'file path {file_path}') - - # if os.path.exists(file_path): - # logging.info("File path exist") - # with open(file_path,'r') as file: - # data = json.load(file) - # # logging.info(f"Project id : {data['project_id']}") - # # logging.info(f"Universal domain: {data['universe_domain']}") - # else: - # logging.warning("credntial file path not exist") - video_id = parse_qs(urlparse(youtube_url).query).get('v') - - # google_api_client = GoogleApiClient(service_account_path=Path(file_path)) - # youtube_loader_channel = GoogleApiYoutubeLoader( - # google_api_client=google_api_client, - # video_ids=[video_id[0].strip()], add_video_info=True - # ) - # youtube_transcript = youtube_loader_channel.load() - # page_content = youtube_transcript[0].page_content - - obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] - #obj_source_node.file_name = YouTube(youtube_url).title + obj_source_node.file_name = match.group(1) transcript= get_youtube_combined_transcript(match.group(1)) - print(transcript) + logging.info(f"Youtube transcript : {transcript}") if transcript==None or len(transcript)==0: message = f"Youtube transcript is not available for : {obj_source_node.file_name}" + logging.info(f"Youtube transcript is not available for : {obj_source_node.file_name}") raise Exception(message) else: obj_source_node.file_size = sys.getsizeof(transcript) @@ -212,7 +189,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type async def extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, fileName, allowedNodes, allowedRelationship, retry_condition): logging.info(f'Process file name :{fileName}') - if retry_condition is None: + if not retry_condition: gcs_file_cache = os.environ.get('GCS_FILE_CACHE') if gcs_file_cache == 'True': folder_name = create_gcs_bucket_folder_name_hashed(uri, fileName) @@ -226,7 +203,7 @@ async def extract_graph_from_file_local_file(uri, userName, password, database, return await processing_source(uri, userName, password, database, model, fileName, [], allowedNodes, allowedRelationship, True, merged_file_path, retry_condition) async def extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: if(aws_access_key_id==None or aws_secret_access_key==None): raise Exception('Please provide AWS access and secret keys') else: @@ -240,9 +217,8 @@ async def extract_graph_from_file_s3(uri, userName, password, database, model, s return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_web_page(source_url) - if pages==None or len(pages)==0: raise Exception(f'Content is not available for given URL : {file_name}') return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship) @@ -250,7 +226,7 @@ async def extract_graph_from_web_page(uri, userName, password, database, model, return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_youtube(source_url) if pages==None or len(pages)==0: @@ -260,7 +236,7 @@ async def extract_graph_from_file_youtube(uri, userName, password, database, mod return await processing_source(uri, userName, password, database, model, file_name, [], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_Wikipedia(wiki_query, language) if pages==None or len(pages)==0: raise Exception(f'Wikipedia page is not available for file : {file_name}') @@ -269,7 +245,7 @@ async def extract_graph_from_file_Wikipedia(uri, userName, password, database, m return await processing_source(uri, userName, password, database, model, file_name,[], allowedNodes, allowedRelationship, retry_condition=retry_condition) async def extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition): - if retry_condition is None: + if not retry_condition: file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) if pages==None or len(pages)==0: raise Exception(f'File content is not available for file : {file_name}') @@ -303,7 +279,6 @@ async def processing_source(uri, userName, password, database, model, file_name, logging.info(f'Time taken database connection: {elapsed_create_connection:.2f} seconds') uri_latency["create_connection"] = f'{elapsed_create_connection:.2f}' graphDb_data_Access = graphDBdataAccess(graph) - start_get_chunkId_chunkDoc_list = time.time() total_chunks, chunkId_chunkDoc_list = get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition) end_get_chunkId_chunkDoc_list = time.time() @@ -341,6 +316,7 @@ async def processing_source(uri, userName, password, database, model, file_name, start_update_source_node = time.time() graphDb_data_Access.update_source_node(obj_source_node) + count_response = graphDb_data_Access.update_node_relationship_count(file_name) end_update_source_node = time.time() elapsed_update_source_node = end_update_source_node - start_update_source_node logging.info(f'Time taken to update the document source node: {elapsed_update_source_node:.2f} seconds') @@ -389,6 +365,7 @@ async def processing_source(uri, userName, password, database, model, file_name, obj_source_node.node_count = node_count obj_source_node.relationship_count = rel_count graphDb_data_Access.update_source_node(obj_source_node) + count_response = graphDb_data_Access.update_node_relationship_count(file_name) result = graphDb_data_Access.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] @@ -404,6 +381,7 @@ async def processing_source(uri, userName, password, database, model, file_name, obj_source_node.processing_time = processed_time graphDb_data_Access.update_source_node(obj_source_node) + count_response = graphDb_data_Access.update_node_relationship_count(file_name) logging.info('Updated the nodeCount and relCount properties in Document node') logging.info(f'file:{file_name} extraction has been completed') @@ -496,12 +474,10 @@ async def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, node_count += len(distinct_nodes) rel_count += len(relations) - print(f'node count internal func:{node_count}') - print(f'relation count internal func:{rel_count}') return node_count,rel_count,latency_processing_chunk def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): - if retry_condition is None: + if not retry_condition: logging.info("Break down file into chunks") bad_chars = ['"', "\n", "'"] for i in range(0,len(pages)): @@ -521,8 +497,8 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): chunkId_chunkDoc_list=[] chunks = graph.query(QUERY_TO_GET_CHUNKS, params={"filename":file_name}) - if chunks[0]['text'] is None or chunks[0]['text']=="" : - raise Exception(f"Chunks are not created for {file_name}. Please re-upload file and try.") + if chunks[0]['text'] is None or chunks[0]['text']=="" or not chunks : + raise Exception(f"Chunks are not created for {file_name}. Please re-upload file and try again.") else: for chunk in chunks: chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']}) @@ -531,15 +507,16 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition): if retry_condition == START_FROM_LAST_PROCESSED_POSITION: logging.info(f"Retry : start_from_last_processed_position") starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name}) - if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): + + if starting_chunk and starting_chunk[0]["position"] < len(chunkId_chunkDoc_list): return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] - elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): + elif starting_chunk and starting_chunk[0]["position"] == len(chunkId_chunkDoc_list): starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name}) return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:] else: - raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning") + raise Exception(f"All chunks of file are alreday processed. If you want to re-process, Please start from begnning") else: logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}") @@ -678,6 +655,7 @@ def manually_cancelled_job(graph, filenames, source_types, merged_dir, uri): obj_source_node.updated_at = datetime.now() graphDb_data_Access = graphDBdataAccess(graph) graphDb_data_Access.update_source_node(obj_source_node) + count_response = graphDb_data_Access.update_node_relationship_count(file_name) obj_source_node = None merged_file_path = os.path.join(merged_dir, file_name) if source_type == 'local file' and gcs_file_cache == 'True': @@ -705,7 +683,7 @@ def populate_graph_schema_from_text(text, model, is_schema_description_cheked): def set_status_retry(graph, file_name, retry_condition): graphDb_data_Access = graphDBdataAccess(graph) obj_source_node = sourceNode() - status = "Reprocess" + status = "Ready to Reprocess" obj_source_node.file_name = file_name obj_source_node.status = status obj_source_node.retry_condition = retry_condition diff --git a/backend/src/make_relationships.py b/backend/src/make_relationships.py index 1ea2729e5..7d079fcf3 100644 --- a/backend/src/make_relationships.py +++ b/backend/src/make_relationships.py @@ -1,4 +1,4 @@ -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from langchain.docstore.document import Document from src.shared.common_fn import load_embedding_model import logging diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 7746df3d0..02fc7fb06 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -1,7 +1,7 @@ from neo4j import GraphDatabase import logging import time -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph import os from src.shared.common_fn import load_embedding_model diff --git a/backend/src/ragas_eval.py b/backend/src/ragas_eval.py index 8052cb9a2..c9f447242 100644 --- a/backend/src/ragas_eval.py +++ b/backend/src/ragas_eval.py @@ -7,6 +7,13 @@ from ragas import evaluate from ragas.metrics import answer_relevancy, faithfulness from src.shared.common_fn import load_embedding_model +from ragas.dataset_schema import SingleTurnSample +from ragas.metrics import RougeScore, SemanticSimilarity, ContextEntityRecall +from ragas.llms import LangchainLLMWrapper +from ragas.embeddings import LangchainEmbeddingsWrapper +import nltk + +nltk.download('punkt') load_dotenv() EMBEDDING_MODEL = os.getenv("RAGAS_EMBEDDING_MODEL") @@ -52,3 +59,41 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str): except Exception as e: logging.exception(f"Error during metrics evaluation: {e}") return {"error": str(e)} + + +async def get_additional_metrics(question: str, contexts: list, answers: list, reference: str, model_name: str): + """Calculates multiple metrics for given question, answers, contexts, and reference.""" + try: + if ("diffbot" in model_name) or ("ollama" in model_name): + raise ValueError(f"Unsupported model for evaluation: {model_name}") + llm, model_name = get_llm(model=model_name) + ragas_llm = LangchainLLMWrapper(llm) + embeddings = EMBEDDING_FUNCTION + embedding_model = LangchainEmbeddingsWrapper(embeddings=embeddings) + rouge_scorer = RougeScore() + semantic_scorer = SemanticSimilarity() + entity_recall_scorer = ContextEntityRecall() + entity_recall_scorer.llm = ragas_llm + semantic_scorer.embeddings = embedding_model + metrics = [] + for response, context in zip(answers, contexts): + sample = SingleTurnSample(response=response, reference=reference) + rouge_score = await rouge_scorer.single_turn_ascore(sample) + rouge_score = round(rouge_score,4) + semantic_score = await semantic_scorer.single_turn_ascore(sample) + semantic_score = round(semantic_score, 4) + if "gemini" in model_name: + entity_recall_score = "Not Available" + else: + entity_sample = SingleTurnSample(reference=reference, retrieved_contexts=[context]) + entity_recall_score = await entity_recall_scorer.single_turn_ascore(entity_sample) + entity_recall_score = round(entity_recall_score, 4) + metrics.append({ + "rouge_score": rouge_score, + "semantic_score": semantic_score, + "context_entity_recall_score": entity_recall_score + }) + return metrics + except Exception as e: + logging.exception("Error in get_additional_metrics") + return {"error": str(e)} \ No newline at end of file diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 6d24912c7..0c0b4bea1 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -1,25 +1,15 @@ import hashlib import logging from src.document_sources.youtube import create_youtube_url -from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings +from langchain_huggingface import HuggingFaceEmbeddings from langchain_google_vertexai import VertexAIEmbeddings from langchain_openai import OpenAIEmbeddings -from langchain.docstore.document import Document -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from langchain_community.graphs.graph_document import GraphDocument from typing import List import re import os from pathlib import Path -from langchain_openai import ChatOpenAI -from langchain_google_vertexai import ChatVertexAI -from langchain_groq import ChatGroq -from langchain_google_vertexai import HarmBlockThreshold, HarmCategory -from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer -# from neo4j.debug import watch - -# watch("neo4j") - def check_url_source(source_type, yt_url:str=None, wiki_query:str=None): language='' @@ -86,15 +76,15 @@ def load_embedding_model(embedding_model_name: str): dimension = 768 logging.info(f"Embedding: Using Vertex AI Embeddings , Dimension:{dimension}") else: - embeddings = SentenceTransformerEmbeddings( + embeddings = HuggingFaceEmbeddings( model_name="all-MiniLM-L6-v2"#, cache_folder="/embedding_model" ) dimension = 384 - logging.info(f"Embedding: Using SentenceTransformer , Dimension:{dimension}") + logging.info(f"Embedding: Using Langchain HuggingFaceEmbeddings , Dimension:{dimension}") return embeddings, dimension def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]): - graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True) + graph.add_graph_documents(graph_document_list, baseEntityLabel=True) # graph.add_graph_documents(graph_document_list) def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]): diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index 084b5d1ba..8307dc3c9 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -1,14 +1,4 @@ -MODEL_VERSIONS = { - "openai-gpt-3.5": "gpt-3.5-turbo-0125", - "gemini-1.0-pro": "gemini-1.0-pro-001", - "gemini-1.5-pro": "gemini-1.5-pro-002", - "gemini-1.5-flash": "gemini-1.5-flash-002", - "openai-gpt-4": "gpt-4-turbo-2024-04-09", - "diffbot" : "gpt-4-turbo-2024-04-09", - "openai-gpt-4o-mini": "gpt-4o-mini-2024-07-18", - "openai-gpt-4o":"gpt-4o-2024-08-06", - "groq-llama3" : "llama3-70b-8192" - } + OPENAI_MODELS = ["openai-gpt-3.5", "openai-gpt-4o", "openai-gpt-4o-mini"] GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash"] GROQ_MODELS = ["groq-llama3"] @@ -174,6 +164,90 @@ LIMIT $limit """ +NODEREL_COUNT_QUERY_WITH_COMMUNITY = """ +MATCH (d:Document) +WHERE d.fileName IS NOT NULL +OPTIONAL MATCH (d)<-[po:PART_OF]-(c:Chunk) +OPTIONAL MATCH (c)-[he:HAS_ENTITY]->(e:__Entity__) +OPTIONAL MATCH (c)-[sim:SIMILAR]->(c2:Chunk) +OPTIONAL MATCH (c)-[nc:NEXT_CHUNK]->(c3:Chunk) +OPTIONAL MATCH (e)-[ic:IN_COMMUNITY]->(comm:__Community__) +OPTIONAL MATCH (comm)-[pc1:PARENT_COMMUNITY]->(first_level:__Community__) +OPTIONAL MATCH (first_level)-[pc2:PARENT_COMMUNITY]->(second_level:__Community__) +OPTIONAL MATCH (second_level)-[pc3:PARENT_COMMUNITY]->(third_level:__Community__) +WITH + d.fileName AS filename, + count(DISTINCT c) AS chunkNodeCount, + count(DISTINCT po) AS partOfRelCount, + count(DISTINCT he) AS hasEntityRelCount, + count(DISTINCT sim) AS similarRelCount, + count(DISTINCT nc) AS nextChunkRelCount, + count(DISTINCT e) AS entityNodeCount, + collect(DISTINCT e) AS entities, + count(DISTINCT comm) AS baseCommunityCount, + count(DISTINCT first_level) AS firstlevelcommCount, + count(DISTINCT second_level) AS secondlevelcommCount, + count(DISTINCT third_level) AS thirdlevelcommCount, + count(DISTINCT ic) AS inCommunityCount, + count(DISTINCT pc1) AS parentCommunityRelCount1, + count(DISTINCT pc2) AS parentCommunityRelCount2, + count(DISTINCT pc3) AS parentCommunityRelCount3 +WITH + filename, + chunkNodeCount, + partOfRelCount + hasEntityRelCount + similarRelCount + nextChunkRelCount AS chunkRelCount, + entityNodeCount, + entities, + baseCommunityCount + firstlevelcommCount + secondlevelcommCount + thirdlevelcommCount AS commCount, + inCommunityCount + parentCommunityRelCount1 + parentCommunityRelCount2 + parentCommunityRelCount3 AS communityRelCount +CALL (entities) { + UNWIND entities AS e + RETURN sum(COUNT { (e)-->(e2:__Entity__) WHERE e2 in entities }) AS entityEntityRelCount +} +RETURN + filename, + COALESCE(chunkNodeCount, 0) AS chunkNodeCount, + COALESCE(chunkRelCount, 0) AS chunkRelCount, + COALESCE(entityNodeCount, 0) AS entityNodeCount, + COALESCE(entityEntityRelCount, 0) AS entityEntityRelCount, + COALESCE(commCount, 0) AS communityNodeCount, + COALESCE(communityRelCount, 0) AS communityRelCount +""" +NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY = """ +MATCH (d:Document) +WHERE d.fileName = $document_name +OPTIONAL MATCH (d)<-[po:PART_OF]-(c:Chunk) +OPTIONAL MATCH (c)-[he:HAS_ENTITY]->(e:__Entity__) +OPTIONAL MATCH (c)-[sim:SIMILAR]->(c2:Chunk) +OPTIONAL MATCH (c)-[nc:NEXT_CHUNK]->(c3:Chunk) +WITH + d.fileName AS filename, + count(DISTINCT c) AS chunkNodeCount, + count(DISTINCT po) AS partOfRelCount, + count(DISTINCT he) AS hasEntityRelCount, + count(DISTINCT sim) AS similarRelCount, + count(DISTINCT nc) AS nextChunkRelCount, + count(DISTINCT e) AS entityNodeCount, + collect(DISTINCT e) AS entities +WITH + filename, + chunkNodeCount, + partOfRelCount + hasEntityRelCount + similarRelCount + nextChunkRelCount AS chunkRelCount, + entityNodeCount, + entities +CALL (entities) { + UNWIND entities AS e + RETURN sum(COUNT { (e)-->(e2:__Entity__) WHERE e2 in entities }) AS entityEntityRelCount +} +RETURN + filename, + COALESCE(chunkNodeCount, 0) AS chunkNodeCount, + COALESCE(chunkRelCount, 0) AS chunkRelCount, + COALESCE(entityNodeCount, 0) AS entityNodeCount, + COALESCE(entityEntityRelCount, 0) AS entityEntityRelCount +""" + + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 diff --git a/backend/src/shared/schema_extraction.py b/backend/src/shared/schema_extraction.py index 80954ba65..1b7f76c92 100644 --- a/backend/src/shared/schema_extraction.py +++ b/backend/src/shared/schema_extraction.py @@ -2,7 +2,6 @@ #from langchain_core.pydantic_v1 import BaseModel, Field from pydantic.v1 import BaseModel, Field from src.llm import get_llm -from src.shared.constants import MODEL_VERSIONS from langchain_core.prompts import ChatPromptTemplate class Schema(BaseModel): diff --git a/docker-compose.yml b/docker-compose.yml index 8a0fdc4b2..ea074f50b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -63,11 +63,12 @@ services: - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} + - DEPLOYMENT_ENV=local volumes: - ./frontend:/app - /app/node_modules - # env_file: - # - ./frontend/.env + env_file: + - ./frontend/.env container_name: frontend ports: - "8080:8080" diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 2591ac47e..058f88a58 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -923,14 +923,14 @@ The API is used to drop and create the vector index when vector index dimesion a POST /retry_processing ---- -This API is used to reprocess cancelled, completed or failed file sources. -Users have 3 options to reprocess files: +This API is used to Ready to Reprocess cancelled, completed or failed file sources. +Users have 3 options to Ready to Reprocess files: * Start from begnning - In this condition file will be processed from the begnning i.e. 1st chunk again. * Delete entities and start from begnning - If the file source is already processed and have any existing nodes and relations then those will be deleted and file will be reprocessed from the 1st chunk. * Start from last processed postion - Cancelled or failed files will be processed from the last successfully processed chunk position. This option is not available for completed files. -Ones the status is set to 'Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. +Ones the status is set to 'Ready to Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. **API Parameters :** @@ -938,7 +938,7 @@ Ones the status is set to 'Reprocess', user can again click on Generate graph to * `userName`= Neo4j db username, * `password`= Neo4j db password, * `database`= Neo4j database name, -* `file_name`= Name of the file which user want to reprocess. +* `file_name`= Name of the file which user want to Ready to Reprocess. * `retry_condition` = One of the above 3 conditions which is selected for reprocessing. @@ -947,7 +947,7 @@ Ones the status is set to 'Reprocess', user can again click on Generate graph to .... { "status": "Success", - "message": "Status set to Reprocess for filename : $filename" + "message": "Status set to Ready to Reprocess for filename : $filename" } .... @@ -979,3 +979,99 @@ The API responsible for a evaluating chatbot responses on the basis of different } } .... + +=== Evaluate response with ground truth +---- +POST /additional_metrics +---- + +The API responsible for a evaluating chatbot responses on the basis of different metrics such as context entity recall, semantic score, rouge score. This reuqire additional ground truth to be supplied by user. This utilises RAGAS library to calculate these metrics. + +**API Parameters :** + +* `question`= User query for the chatbot +* `context`= context retrieved by retrieval mode used for answer generation +* `answer`= answer generated by chatbot +* `reference`= ground truth/ expected answer provided by user +* `model`= LLM model +* `mode`= Retrieval mode used for answer generationRetrieval mode used for answer generation + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "graph_vector_fulltext": { + "rouge_score": 1.0, + "semantic_score": 0.9842, + "context_entity_recall_score": 0.5 + } + } +} +.... + +=== Fetch chunk text + +---- +POST /fetch_chunktext +---- + +The API responsible for a fetching text associated with a particular chunk and chunk metadata. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name +* `document_name` = Name of document for which chunks needs to be fetched. +* `page no` = page number for multipage + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "pageitems": [ + { + "text": "By T. Albert Illustrated by: maaillustrations.com Science has never been so much fun. Here is all that a child needs to know about water, rain, hail, sleet and water cycle. When Professor Mois Ture teaches- little readers read, learn and ask for more….. Published by Monkey Pen Ltd Dear Supporter, Thank you for downloading our childrens books. Monkey Pens Vision is to provide thousands of free childrens books to young readers around the globe. Please share our books with your friends and family to support our mission. Thank you Please make a donation on Patreon to support Monkey Pens Free Book Project: Hi, I am Professor Mois Ture and I will be telling you about water. You can call it RAIN. You can call it SNOW. You can call it SLEET. You can call it HAIL. But it’s WATER all the same. Did you ever wonder how", + "position": 1, + "pagenumber": 1 + }, + { + "text": " it HAIL. But it’s WATER all the same. Did you ever wonder how old water is or where it comes from? The answers may surprise you. The next time you see a pond or even a glass of water, think about how old that water might be. Do you really want to know ? I thought you did. Did you brush your teeth this morning? Well, some of the water that you used could have fallen from the sky yesterday, or a week, or month ago. It’s pretty new. But, some part of that water is very old and was around during the time of the dinosaurs, or even longer. Or maybe it’s a little newer; like from the time when the Pharaohs were building pyramids. You see there is only a limited amount of water and it gets recycled. Yep! It keeps going round and round. We call it the “Water Cycle.” Yes – You", + "position": 2, + "pagenumber": 2 + } + ], + "total_pages": 1 + }, + "message": "Total elapsed API time 0.48" +} + +.... +=== Backend Database connection +---- +POST /backend_connection_configuation +---- + +The API responsible for create the connection obj from Neo4j DB based on environment variable and return the status for show/hide login dialog on UI + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": true, + "message": "Backend connection successful" +} +.... + +.... +{ + "status": "Failed", + "error": "Could not connect to Neo4j database. Please ensure that the username and password are correct", + "message": "Unable to connect backend DB" +} diff --git a/docs/frontend/frontend_docs.adoc b/docs/frontend/frontend_docs.adoc index 9eaf1e4bc..34e71f254 100644 --- a/docs/frontend/frontend_docs.adoc +++ b/docs/frontend/frontend_docs.adoc @@ -18,53 +18,80 @@ This document provides a comprehensive guide for developers on how we build a Re . ├── Components | ├─ ChatBot - | | ├─ ChatBotInfoModal + | | ├─ ChatInfoModal | | ├─ ChatModeToggle | | ├─ ExpandedChatButtonContainer + | | ├─ ChatModesSwitch + | | ├─ ChatOnlyComponent + | | ├─ ChatInfo + | | ├─ CommonChatActions + | | ├─ CommunitiesInfo + | | ├─ EntitiesInfo + | | ├─ MetricsCheckbox + | | ├─ MetricsTab + | | ├─ MultiModeMetrics + | | ├─ SourcesInfo | ├─ Data Sources | | ├─ AWS | | ├─ GCS | | ├─ Local - | | ├─ WebSources - | | | ├─Web - | | | ├─Wikipedia - | | | ├─Youtube + | | ├─ Web + | | | ├─ WebButton | ├─ Graph + | | ├─ CheckboxSelection + | | ├─ GraphPropertiesPanel + | | ├─ GraphPropertiesTable | | ├─ GraphViewButton | | ├─ GraphViewModal | | ├─ LegendsChip + | | ├─ ResizePanel + | | ├─ ResultOverview | ├─ Layout - | | ├─ Content + | | ├─ AlertIcon | | ├─ DrawerChatbot | | ├─ DrawerDropzone | | ├─ Header | | ├─ PageLayout | | ├─ SideNav | ├─ Popups + | | ├─ ChunkPopUp | | ├─ ConnectionModal | | ├─ DeletePopup | | ├─ GraphEnhancementDialog | | ├─ LargeFilePopup + | | ├─ RetryConfirmation | | ├─ Settings | ├─ UI | | ├─ Alert | | ├─ ButtonWithTooltip | | ├─ CustomButton - | | ├─ CustomModal + | | ├─ CustomCheckBox + | | ├─ CustomMenu | | ├─ CustomProgressBar - | | ├─ CustomSourceInput - | | ├─ Dropdown + | | ├─ DatabaseIcon + | | ├─ DatabaseStatusIcon | | ├─ ErrorBoundary - | | ├─ FileTable - | | ├─ GenericSourceButton - | | ├─ GenericSourceModal + | | ├─ FallBackDialog | | ├─ HoverableLink | | ├─ IconButtonTooltip | | ├─ Legend - | | ├─ Menu - | | ├─ QuickStarter + | | ├─ ScienceMolecule + | | ├─ ShowAll + | | ├─ TipWrapper + | ├─ Websources + | | ├─ Web + | | ├─ Wikipedia + | | ├─ Youtube + | | ├─ CustomSourceInput + | | ├─ GenericSourceButton + | | ├─ GenericSourceModal + | ├─ Content + | ├─ Dropdown + | ├─ FileTable + | ├─ QuickStarter ├── HOC - | ├─ SettingModalHOC + | ├─ CustomModal + | ├─ withVisibility ├── Assets | ├─ images | | ├─ Application Images @@ -87,8 +114,14 @@ This document provides a comprehensive guide for developers on how we build a Re | ├─ constants | ├─ FileAPI | ├─ Loader - | ├─ Types + | ├─ Queue + | ├─ toats | ├─ utils + ├── App + ├── index + ├── main + ├── router + ├── types └── README.md == Application @@ -98,7 +131,10 @@ Added Node.js with version v21.1.0 and npm on the development machine. Install necessary dependencies by running yarn install, such as axios for making HTTP requests and others to interact with the graph. == 2. Connect to the Neo4j Aura instance: -Created a connection modal by adding details including protocol, URI, database name, username, and password. Added a submit button that triggers an API: ***/connect*** and accepts params like uri, password, username and database to establish a connection to the Neo4j Aura instance. Handled the authentication and error scenarios appropriately, by displaying relevant messages. To check whether the backend connection is up and working we hit the API: ***/health*** +Created a connection modal by adding details including protocol, URI, database name, username, and password. Added a submit button that triggers an API: ***/connect*** and accepts params like uri, password, username and database to establish a connection to the Neo4j Aura instance. Handled the authentication and error scenarios appropriately, by displaying relevant messages. To check whether the backend connection is up and working we hit the API: ***/health.*** The user can now access both AURA DS and AURA DB instances. + +* If GDS Connection is there icon is scientific molecule > Graph enhancement model > Post processing jobs > gives user the leverage to check and uncheck the communities checkbox. +* If AURA DB > icon is database icon > Graph enhancement model > Post processing jobs > communities checkbox is disabled. * Before Connection : @@ -241,6 +277,10 @@ User can delete all number/selected files from the table. image::images/DeleteFiles.jpg[DeleteFiles, 600] +* ***Chat Only Mode*** + +User can also use the chat only feature by navigating to the url https://dev-frontend-dcavk67s4a-uc.a.run.app/chat-only to ask questions related to documents which have been completely processed. User is required to pass the login credentials to connect to the database. + == 8. Interface Design: Designed a user-friendly interface that guides users through the process of connecting to Neo4j Aura, accessing file sources, uploading PDF files, and generating graphs. diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 311294f4a..5cbc3d8de 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -28,13 +28,16 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \ VITE_CHAT_MODES=$VITE_CHAT_MODES \ VITE_BATCH_SIZE=$VITE_BATCH_SIZE \ + VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_LLM_MODELS_PROD=$VITE_LLM_MODELS_PROD \ yarn run build # Step 2: Serve the application using Nginx FROM nginx:alpine +ARG DEPLOYMENT_ENV="local" +ENV DEPLOYMENT_ENV=$DEPLOYMENT_ENV COPY --from=build /app/dist /usr/share/nginx/html -COPY nginx/nginx.conf /etc/nginx/conf.d/default.conf +COPY /nginx/nginx.${DEPLOYMENT_ENV}.conf /etc/nginx/templates/nginx.conf.template EXPOSE 8080 -CMD ["nginx", "-g", "daemon off;"] +CMD ["nginx", "-g", "daemon off;"] \ No newline at end of file diff --git a/frontend/README.md b/frontend/README.md index 11a830c70..6f5c0522a 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -1,6 +1,6 @@ # Neo4j Knowledge Graph Builder -Reactjs app for building an knowledge graph using [Neo4j Needle](https://www.neo4j.design/). +ReactJS app for building an knowledge graph using [Neo4j Needle](https://www.neo4j.design/). ## Features - 🚀 Responsive: Adapts to different screen sizes for optimal user experience. @@ -24,5 +24,5 @@ Do run yarn add -- package name to get project updated with required dependencie 29/01/2024> Latest dependency yarn add uuid ## -Upload api url should be picked from: ports tab under codespace environement // For demo +Upload api url should be picked from: ports tab under codespace environment // For demo ## What it looks like diff --git a/frontend/example.env b/frontend/example.env index 4063fbc37..901ac6017 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -10,3 +10,5 @@ VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" +VITE_FRONTEND_HOSTNAME="localhost:8080" +VITE_SEGMENT_API_URL="" diff --git a/frontend/nginx/nginx.conf b/frontend/nginx/nginx.local.conf similarity index 94% rename from frontend/nginx/nginx.conf rename to frontend/nginx/nginx.local.conf index d31c0348b..2bab3515f 100644 --- a/frontend/nginx/nginx.conf +++ b/frontend/nginx/nginx.local.conf @@ -1,16 +1,16 @@ -server { - - listen 8080; - - location / { - root /usr/share/nginx/html; - index index.html index.htm; - try_files $uri $uri/ /index.html; - } - - error_page 401 403 404 index.html; - - location /public { - root /usr/local/var/www; - } +server { + + listen 8080; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + error_page 401 403 404 index.html; + + location /public { + root /usr/local/var/www; + } } \ No newline at end of file diff --git a/frontend/nginx/nginx.prod.conf b/frontend/nginx/nginx.prod.conf new file mode 100644 index 000000000..d9c369326 --- /dev/null +++ b/frontend/nginx/nginx.prod.conf @@ -0,0 +1,22 @@ +server { + listen 8080; + add_header X-Frame-Options "DENY"; + add_header X-Content-Type-Options "nosniff"; + add_header Content-Security-Policy "connect-src 'self' ${VITE_BACKEND_API_URL} ${VITE_SEGMENT_API_URL}; + frame-src 'self' *.youtube.com *.wikipedia.org; + script-src 'self' 'unsafe-inline' https://accounts.google.com/gsi/client; + default-src 'self' *.${VITE_FRONTEND_HOSTNAME} data:; + style-src 'self' *.googleapis.com 'unsafe-inline';" always ; + gzip on; + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + error_page 401 403 404 index.html; + + location /public { + root /usr/local/var/www; + } +} \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json index 9e51f89fa..846621f49 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -15,11 +15,12 @@ "@mui/material": "^5.15.10", "@mui/styled-engine": "^5.15.9", "@neo4j-devtools/word-color": "^0.0.8", - "@neo4j-ndl/base": "^2.12.7", - "@neo4j-ndl/react": "^2.16.9", - "@neo4j-nvl/base": "^0.3.3", - "@neo4j-nvl/react": "^0.3.3", + "@neo4j-ndl/base": "^3.0.16", + "@neo4j-ndl/react": "^3.0.30", + "@neo4j-nvl/base": "^0.3.6", + "@neo4j-nvl/react": "^0.3.6", "@react-oauth/google": "^0.12.1", + "@tanstack/react-table": "^8.20.5", "@types/uuid": "^9.0.7", "axios": "^1.6.5", "clsx": "^2.1.1", diff --git a/frontend/src/App.css b/frontend/src/App.css index e912a05e2..93eab4ae5 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -25,7 +25,7 @@ } .contentWithExpansion { - width: calc(-840px + 100dvw); + width: calc(-807px + 100dvw); height: calc(100dvh - 58px); padding: 3px; display: flex; @@ -58,7 +58,7 @@ } .contentWithChatBot { - width: calc(-550px + 100dvw); + width: calc(-512px + 100dvw); height: calc(100dvh - 58px); padding: 3px; display: flex; @@ -386,4 +386,13 @@ .custom-menu { min-width: 250px; max-width: 305px; -} \ No newline at end of file +} +.ndl-modal-root{ + z-index: 39 !important; +} +.tbody-dark .ndl-data-grid-tr:hover { + --cell-background: rgb(60 63 68) !important; +} +.tbody-light .ndl-data-grid-tr:hover { + --cell-background: rgb(226 227 229) !important; +} diff --git a/frontend/src/HOC/CustomModal.tsx b/frontend/src/HOC/CustomModal.tsx index e756a092d..814a4343c 100644 --- a/frontend/src/HOC/CustomModal.tsx +++ b/frontend/src/HOC/CustomModal.tsx @@ -16,7 +16,7 @@ const CustomModal: React.FC = ({ return ( = ({ {status !== 'unknown' && ( setStatus('unknown')} type={status} name='Custom Banner' + usage='inline' /> )}
{children}
- diff --git a/frontend/src/HOC/withVisibility.tsx b/frontend/src/HOC/withVisibility.tsx new file mode 100644 index 000000000..057c38bd0 --- /dev/null +++ b/frontend/src/HOC/withVisibility.tsx @@ -0,0 +1,14 @@ +interface VisibilityProps { + isVisible: boolean; +} +export function withVisibility

(WrappedComponent: React.ComponentType

) { + const VisibityControlled = (props: P & VisibilityProps) => { + if (props.isVisible === false) { + return null; + } + + return ; + }; + + return VisibityControlled; +} diff --git a/frontend/src/assets/images/chunks.svg b/frontend/src/assets/images/chunks.svg new file mode 100644 index 000000000..e1aa08f32 --- /dev/null +++ b/frontend/src/assets/images/chunks.svg @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/frontend/src/components/BreakDownPopOver.tsx b/frontend/src/components/BreakDownPopOver.tsx new file mode 100644 index 000000000..f6798bf97 --- /dev/null +++ b/frontend/src/components/BreakDownPopOver.tsx @@ -0,0 +1,33 @@ +import CustomPopOver from './UI/CustomPopOver'; +import { IconButton } from '@neo4j-ndl/react'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import { CustomFileBase } from '../types'; +import { useCredentials } from '../context/UserCredentials'; + +export default function BreakDownPopOver({ file, isNodeCount = true }: { file: CustomFileBase; isNodeCount: boolean }) { + const { isGdsActive } = useCredentials(); + + return ( + + + + } + > + {isNodeCount ? ( +

    +
  • Chunk Nodes: {file.chunkNodeCount}
  • +
  • Entity Nodes: {file.entityNodeCount}
  • + {isGdsActive &&
  • Community Nodes: {file.communityNodeCount}
  • } +
+ ) : ( +
    +
  • Chunk Relations: {file.chunkRelCount}
  • +
  • Entity Relations: {file.entityEntityRelCount}
  • + {isGdsActive &&
  • Community Relations: {file.communityRelCount}
  • } +
+ )} + + ); +} diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index b3c47b4be..68c07227d 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -3,34 +3,42 @@ import { Typography, Flex, Tabs, - CypherCodeBlock, - CypherCodeBlockProps, + Code, useCopyToClipboard, Banner, useMediaQuery, Button, + TextArea, + IconButton, } from '@neo4j-ndl/react'; import { DocumentDuplicateIconOutline, ClipboardDocumentCheckIconOutline } from '@neo4j-ndl/react/icons'; import '../../styling/info.css'; import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; import { ExtendedNode, UserCredentials, chatInfoMessage } from '../../types'; -import { useContext, useEffect, useMemo, useState } from 'react'; +import { useEffect, useMemo, useReducer, useRef, useState } from 'react'; import GraphViewButton from '../Graph/GraphViewButton'; import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo'; import { useCredentials } from '../../context/UserCredentials'; -import { ThemeWrapperContext } from '../../context/ThemeWrapper'; import { tokens } from '@neo4j-ndl/base'; import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './CommunitiesInfo'; -import { chatModeLables, chatModeReadableLables, supportedLLmsForRagas } from '../../utils/Constants'; +import { + chatModeLables, + chatModeReadableLables, + mergeNestedObjects, + supportedLLmsForRagas, +} from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; import { Stack } from '@mui/material'; import { capitalizeWithUnderscore, getNodes } from '../../utils/Utils'; import MultiModeMetrics from './MultiModeMetrics'; +import getAdditionalMetrics from '../../services/AdditionalMetrics'; +import { withVisibility } from '../../HOC/withVisibility'; +import MetricsCheckbox from './MetricsCheckbox'; const ChatInfoModal: React.FC = ({ sources, @@ -80,24 +88,39 @@ const ChatInfoModal: React.FC = ({ : 3 ); const { userCredentials } = useCredentials(); - const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)); const [multiModeError, setMultiModeError] = useState(''); + const [enableReference, toggleReferenceVisibility] = useReducer((state: boolean) => !state, false); + const textAreaRef = useRef(null); + const [isAdditionalMetricsEnabled, setIsAdditionalMetricsEnabled] = useState( + multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 3 + ? true + : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 3 + ? false + : null + ); + const [isAdditionalMetricsWithSingleMode, setIsAdditionalMetricsWithSingleMode] = useState( + metricDetails != undefined && Object.keys(metricDetails).length > 2 + ? true + : metricDetails != undefined && Object.keys(metricDetails).length <= 2 + ? false + : null + ); - const actions: CypherCodeBlockProps['actions'] = useMemo( + const actions: React.ComponentProps>[] = useMemo( () => [ { title: 'copy', - 'aria-label': 'copy', + ariaLabel: 'copy', children: ( <> {copiedText ? ( ) : ( - + )} ), @@ -183,79 +206,129 @@ const ChatInfoModal: React.FC = ({ setActiveTab(tabId); }; const loadMetrics = async () => { - if (activeChatmodes) { - if (Object.keys(activeChatmodes).length <= 1) { - setShowMetricsTable(true); - const [defaultMode] = Object.keys(activeChatmodes); - try { - toggleMetricsLoading(); - const response = await getChatMetrics(metricquestion, [metriccontexts], [metricanswer], metricmodel, [ - defaultMode, - ]); - toggleMetricsLoading(); - if (response.data.status === 'Success') { - const data = response; - saveMetrics(data.data.data[defaultMode]); - } else { - throw new Error(response.data.error); - } - } catch (error) { - if (error instanceof Error) { - toggleMetricsLoading(); - console.log('Error in getting chat metrics', error); - saveMetrics({ faithfulness: 0, answer_relevancy: 0, error: error.message }); + // @ts-ignore + const referenceText = textAreaRef?.current?.value ?? ''; + const metricsPromise = []; + if (activeChatmodes != undefined && Object.keys(activeChatmodes).length <= 1) { + setShowMetricsTable(true); + const [defaultMode] = Object.keys(activeChatmodes); + try { + toggleMetricsLoading(); + metricsPromise.push( + getChatMetrics(metricquestion, [metriccontexts], [metricanswer], metricmodel, [defaultMode]) + ); + if (referenceText.trim() != '') { + metricsPromise.push( + getAdditionalMetrics(metricquestion, [metriccontexts], [metricanswer], referenceText, metricmodel, [ + defaultMode, + ]) + ); + toggleReferenceVisibility(); + } + + const metricsResponse = await Promise.allSettled(metricsPromise); + const successresponse = []; + for (let index = 0; index < metricsResponse.length; index++) { + const metricPromise = metricsResponse[index]; + if (metricPromise.status === 'fulfilled' && metricPromise.value.data.status === 'Success') { + successresponse.push(metricPromise.value.data.data); } } - } else { - setShowMultiModeMetrics(true); + setIsAdditionalMetricsWithSingleMode(successresponse.length === 2); toggleMetricsLoading(); - const contextarray = Object.values(activeChatmodes).map((r) => { - return r.metric_contexts; - }); - const answerarray = Object.values(activeChatmodes).map((r) => { - return r.metric_answer; - }); - const modesarray = Object.keys(activeChatmodes).map((mode) => { - return mode; - }); - try { - const responses = await getChatMetrics( - metricquestion, - contextarray as string[], - answerarray as string[], - metricmodel, - modesarray - ); - toggleMetricsLoading(); - if (responses.data.status === 'Success') { - const modewisedata = responses.data.data; - const metricsdata = Object.entries(modewisedata).map(([mode, scores]) => { - return { mode, answer_relevancy: scores.answer_relevancy, faithfulness: scores.faithfulness }; - }); - saveMultimodemetrics(metricsdata); + const mergedState = successresponse.reduce((acc, cur) => { + if (acc[defaultMode]) { + acc[defaultMode] = { ...acc[defaultMode], ...cur[defaultMode] }; } else { - throw new Error(responses.data.error); + acc[defaultMode] = cur[defaultMode]; } - } catch (error) { + return acc; + }, {}); + saveMetrics(mergedState[defaultMode]); + } catch (error) { + if (error instanceof Error) { + setShowMetricsTable(false); toggleMetricsLoading(); console.log('Error in getting chat metrics', error); - if (error instanceof Error) { - setMultiModeError(error.message); + saveMetrics({ faithfulness: 0, answer_relevancy: 0, error: error.message }); + } + } + } else if (activeChatmodes != undefined) { + setShowMultiModeMetrics(true); + toggleMetricsLoading(); + const values = Object.values(activeChatmodes); + const keys = Object.keys(activeChatmodes); + const contextarray = values.map((r) => { + return r.metric_contexts; + }); + const answerarray = values.map((r) => { + return r.metric_answer; + }); + const modesarray = keys.map((mode) => { + return mode; + }); + try { + metricsPromise.push( + getChatMetrics(metricquestion, contextarray as string[], answerarray as string[], metricmodel, modesarray) + ); + if (referenceText.trim() != '') { + metricsPromise.push( + getAdditionalMetrics( + metricquestion, + contextarray as string[], + answerarray as string[], + referenceText, + metricmodel, + modesarray + ) + ); + toggleReferenceVisibility(); + } + const metricsResponse = await Promise.allSettled(metricsPromise); + toggleMetricsLoading(); + const successResponse = []; + for (let index = 0; index < metricsResponse.length; index++) { + const metricPromise = metricsResponse[index]; + if (metricPromise.status === 'fulfilled' && metricPromise.value.data.status === 'Success') { + successResponse.push(metricPromise.value.data.data); } } + setIsAdditionalMetricsEnabled(successResponse.length === 2); + const metricsdata = Object.entries(mergeNestedObjects(successResponse)).map(([mode, scores]) => { + return { mode, ...scores }; + }); + saveMultimodemetrics(metricsdata); + } catch (error) { + setShowMultiModeMetrics(false); + toggleMetricsLoading(); + console.log('Error in getting chat metrics', error); + if (error instanceof Error) { + setMultiModeError(error.message); + } } } }; - + const MetricsCheckBoxWithCheck = withVisibility(MetricsCheckbox); + const TextareaWithCheck = withVisibility(() => ( + + )); + const isMultiModes = useMemo( + () => activeChatmodes != null && Object.keys(activeChatmodes).length > 1, + [activeChatmodes] + ); + const isSingleMode = useMemo( + () => activeChatmodes != null && Object.keys(activeChatmodes).length <= 1, + [activeChatmodes] + ); return ( - - +
+
- +
Retrieval information To generate this response, the process took {response_time} seconds, @@ -268,10 +341,12 @@ const ChatInfoModal: React.FC = ({ {' '} mode. - - +
+
{error?.length > 0 ? ( - {error} + + {error} + ) : ( {mode === chatModeLables['global search+vector+fulltext'] ? ( @@ -325,6 +400,7 @@ const ChatInfoModal: React.FC = ({ . } + usage='inline' > )} @@ -335,41 +411,63 @@ const ChatInfoModal: React.FC = ({ about 20 seconds . You'll see detailed scores shortly. - - - Faithfulness: Determines How accurately the answer reflects the - provided information - - - Answer Relevancy: Determines How well the answer addresses the - user's question. - - - {showMultiModeMetrics && activeChatmodes != null && Object.keys(activeChatmodes).length > 1 && ( + {showMultiModeMetrics && isMultiModes && ( )} - {showMetricsTable && activeChatmodes != null && Object.keys(activeChatmodes).length <= 1 && ( + {showMetricsTable && isSingleMode && ( )} - {!metricDetails && activeChatmodes != undefined && Object.keys(activeChatmodes).length <= 1 && ( - - )} - {!multiModelMetrics.length && activeChatmodes != undefined && Object.keys(activeChatmodes).length > 1 && ( + + + + + {isSingleMode && + (isAdditionalMetricsWithSingleMode === false || isAdditionalMetricsWithSingleMode === null) && ( + + )} + {isMultiModes && (isAdditionalMetricsEnabled === false || isAdditionalMetricsEnabled === null) && (
); }; export default ChatInfoModal; diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 0a9a81e10..a35674348 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -1,33 +1,36 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; -import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; -import CustomMenu from '../UI/Menu'; +import CustomMenu from '../UI/CustomMenu'; import { chatModeLables, chatModes as AvailableModes, chatModeReadableLables } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; +import { useMemo } from 'react'; export default function ChatModeToggle({ menuAnchor, closeHandler = () => {}, open, - anchorPortal = true, - disableBackdrop = false, + isRoot, }: { - menuAnchor: HTMLElement | null; - closeHandler?: () => void; + menuAnchor: React.RefObject; + closeHandler?: ( + event: Event | undefined, + closeReason: { + type: 'backdropClick' | 'itemClick' | 'escapeKeyDown'; + id?: string; + } + ) => void; open: boolean; - anchorPortal?: boolean; - disableBackdrop?: boolean; + isRoot: boolean; }) { const { setchatModes, chatModes, postProcessingTasks } = useFileContext(); const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); const { isGdsActive } = useCredentials(); - useEffect(() => { - if (!chatModes.length) { - setchatModes([chatModeLables['graph+vector+fulltext']]); - } - }, [chatModes.length]); + if (!chatModes.length) { + setchatModes([chatModeLables['graph+vector+fulltext']]); + } + const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes @@ -44,7 +47,6 @@ export default function ChatModeToggle({ } else { setchatModes((prev) => [...prev, m.mode]); } - closeHandler(); }; return { title: ( @@ -59,7 +61,10 @@ export default function ChatModeToggle({
), - onClick: handleModeChange, + onClick: (e: React.MouseEvent) => { + handleModeChange(); + e.stopPropagation(); + }, disabledCondition: false, description: ( @@ -72,15 +77,8 @@ export default function ChatModeToggle({ ), }; }); - }, [chatModes, memoizedChatModes, closeHandler]); + }, [chatModes, memoizedChatModes]); return ( - + ); } diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index 4ace49af5..5ff01a919 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -23,29 +23,29 @@ export default function ChatModesSwitch({ return ( switchToOtherMode(currentModeIndex - 1)} - aria-label='left' + ariaLabel='left' > - + - {chatmodetoshow} - + switchToOtherMode(currentModeIndex + 1)} - aria-label='right' + ariaLabel='right' > - + ); diff --git a/frontend/src/components/ChatBot/ChatOnlyComponent.tsx b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx new file mode 100644 index 000000000..6d1e94b59 --- /dev/null +++ b/frontend/src/components/ChatBot/ChatOnlyComponent.tsx @@ -0,0 +1,158 @@ +import { useEffect, useState, useCallback, useReducer } from 'react'; +import { useLocation } from 'react-router'; +import { MessageContextWrapper, useMessageContext } from '../../context/UserMessages'; +import UserCredentialsWrapper, { useCredentials } from '../../context/UserCredentials'; +import { FileContextProvider } from '../../context/UsersFiles'; +import Chatbot from './Chatbot'; +import ConnectionModal from '../Popups/ConnectionModal/ConnectionModal'; +import Header from '../Layout/Header'; +import { clearChatAPI } from '../../services/QnaAPI'; +import { ChatProps, connectionState, Messages, UserCredentials } from '../../types'; +import { getIsLoading } from '../../utils/Utils'; +import ThemeWrapper from '../../context/ThemeWrapper'; + +const ChatContent: React.FC = ({ chatMessages }) => { + const { clearHistoryData, messages, setMessages, setClearHistoryData } = useMessageContext(); + const { setUserCredentials, setConnectionStatus, connectionStatus, setShowDisconnectButton } = useCredentials(); + const [showBackButton, setShowBackButton] = useReducer((state) => !state, false); + const [openConnection, setOpenConnection] = useState({ + openPopUp: false, + chunksExists: false, + vectorIndexMisMatch: false, + chunksExistsWithDifferentDimension: false, + }); + /** + * Initializes connection settings based on URL parameters. + */ + const initialiseConnection = useCallback(() => { + const urlParams = new URLSearchParams(window.location.search); + const uri = urlParams.get('uri'); + const user = urlParams.get('user'); + const encodedPassword = urlParams.get('password'); + const database = urlParams.get('database'); + const port = urlParams.get('port'); + const openModal = urlParams.get('open') === 'true'; + if (openModal || !(uri && user && encodedPassword && database && port)) { + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + } else { + const credentialsForAPI: UserCredentials = { + uri, + userName: user, + password: atob(atob(encodedPassword)), + database, + port, + }; + setShowBackButton(); + setUserCredentials(credentialsForAPI); + setConnectionStatus(true); + setMessages(chatMessages); + // Remove query params from URL + window.history.replaceState({}, document.title, window.location.pathname); + } + }, [chatMessages, setUserCredentials, setConnectionStatus, setMessages]); + + useEffect(() => { + initialiseConnection(); + }, [initialiseConnection]); + /** + * Handles successful connection establishment. + */ + const handleConnectionSuccess = () => { + setConnectionStatus(true); + setShowDisconnectButton(true); + setOpenConnection((prev) => ({ ...prev, openPopUp: false })); + const urlParams = new URLSearchParams(window.location.search); + urlParams.delete('openModal'); + window.history.replaceState({}, document.title, `${window.location.pathname}?${urlParams.toString()}`); + }; + /** + * Clears chat history by calling the API. + */ + const deleteOnClick = async () => { + try { + setClearHistoryData(true); + const credentials = JSON.parse(localStorage.getItem('neo4j.connection') || '{}') as UserCredentials; + const sessionId = sessionStorage.getItem('session_id') || ''; + const response = await clearChatAPI(credentials, sessionId); + if (response.data.status !== 'Success') { + setClearHistoryData(false); + } + } catch (error) { + console.error('Error clearing chat history:', error); + setClearHistoryData(false); + } + }; + useEffect(() => { + if (clearHistoryData) { + const currentDateTime = new Date(); + setMessages([ + { + datetime: `${currentDateTime.toLocaleDateString()} ${currentDateTime.toLocaleTimeString()}`, + id: 2, + modes: { + 'graph+vector+fulltext': { + message: + 'Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', + }, + }, + user: 'chatbot', + currentMode: 'graph+vector+fulltext', + }, + ]); + setClearHistoryData(false); + } + }, [clearHistoryData, setMessages]); + return ( + <> + +
+
+
+ +
+
+ + ); +}; +/** + * ChatOnlyComponent + * Wrapper component to provide necessary context and initialize chat functionality. + */ +const ChatOnlyComponent: React.FC = () => { + const location = useLocation(); + const chatMessages = (location.state?.messages as Messages[]) || []; + return ( + + + + + + + + + + ); +}; +export default ChatOnlyComponent; diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index 3fa666e1e..aa84764bc 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -42,6 +42,13 @@ import { downloadClickHandler, getDateTime } from '../../utils/Utils'; import ChatModesSwitch from './ChatModesSwitch'; import CommonActions from './CommonChatActions'; const InfoModal = lazy(() => import('./ChatInfoModal')); +if (typeof window !== 'undefined') { + if (!sessionStorage.getItem('session_id')) { + const id = uuidv4(); + sessionStorage.setItem('session_id', id); + } +} +const sessionId = sessionStorage.getItem('session_id') ?? ''; const Chatbot: FC = (props) => { const { @@ -49,15 +56,14 @@ const Chatbot: FC = (props) => { setMessages: setListMessages, isLoading, isFullScreen, - clear, connectionStatus, + isChatOnly, } = props; const [inputMessage, setInputMessage] = useState(''); const [loading, setLoading] = useState(isLoading); const { userCredentials } = useCredentials(); const { model, chatModes, selectedRows, filesData } = useFileContext(); const messagesEndRef = useRef(null); - const [sessionId, setSessionId] = useState(sessionStorage.getItem('session_id') ?? ''); const [showInfoModal, setShowInfoModal] = useState(false); const [sourcesModal, setSourcesModal] = useState([]); const [modelModal, setModelModal] = useState(''); @@ -123,13 +129,6 @@ const Chatbot: FC = (props) => { const saveCommunities = (chatCommunities: Community[]) => { setCommunities(chatCommunities); }; - useEffect(() => { - if (!sessionStorage.getItem('session_id')) { - const id = uuidv4(); - setSessionId(id); - sessionStorage.setItem('session_id', id); - } - }, []); const simulateTypingEffect = (messageId: number, response: ResponseMode, mode: string, message: string) => { let index = 0; @@ -319,19 +318,9 @@ const Chatbot: FC = (props) => { }; useEffect(() => { scrollToBottom(); - }, [listMessages]); - - useEffect(() => { setLoading(() => listMessages.some((msg) => msg.isLoading || msg.isTyping)); }, [listMessages]); - useEffect(() => { - if (clear) { - cancel(); - setListMessages((msgs) => msgs.map((msg) => ({ ...msg, speaking: false }))); - } - }, [clear]); - const handleCopy = (message: string, id: number) => { copy(message); setListMessages((msgs) => @@ -419,8 +408,12 @@ const Chatbot: FC = (props) => { }, []); return ( -
-
+
+
{listMessages.map((chat, index) => { @@ -440,10 +433,10 @@ const Chatbot: FC = (props) => { className='-ml-4' hasStatus name='KM' - shape='square' size='x-large' source={ChatBotAvatar} status={connectionStatus ? 'online' : 'offline'} + shape='square' type='image' /> ) : ( @@ -451,9 +444,9 @@ const Chatbot: FC = (props) => { className='' hasStatus name='KM' - shape='square' size='x-large' status={connectionStatus ? 'online' : 'offline'} + shape='square' type='image' /> )} @@ -555,12 +548,14 @@ const Chatbot: FC = (props) => { className={`n-bg-palette-neutral-bg-default flex-grow-7 ${ isFullScreen ? 'w-[calc(100%-105px)]' : 'w-[70%]' }`} - aria-label='chatbot-input' - type='text' value={inputMessage} - fluid + isFluid onChange={handleInputChange} - name='chatbot-input' + htmlAttributes={{ + type: 'text', + 'aria-label': 'chatbot-input', + name: 'chatbot-input', + }} /> = (props) => { className: 'n-p-token-4 n-bg-palette-neutral-bg-weak n-rounded-lg', }} onClose={() => setShowInfoModal(false)} - open={showInfoModal} + isOpen={showInfoModal} size={activeChat?.currentMode === chatModeLables['entity search+vector'] ? 'large' : 'medium'} >
{ downloadClickHandler( { @@ -603,25 +601,28 @@ const Chatbot: FC = (props) => { nodes, tokensUsed, model, + multiModelMetrics, }, downloadLinkRef, 'graph-builder-chat-details.json' ); }} > - + "" setShowInfoModal(false)} > - +
= ({ loading, chunks, mode }) => { return ( <> {loading ? ( - +
- +
) : chunks?.length > 0 ? (
    @@ -71,9 +71,10 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -83,7 +84,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { <>
- + = ({ loading, chunks, mode }) => { handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -124,8 +126,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -147,8 +150,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -170,8 +174,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -185,7 +190,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { <>
- + {chunk?.url}
@@ -197,8 +202,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} @@ -229,8 +235,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} + htmlAttributes={{ + onClick: () => handleChunkClick(chunk.element_id, 'Chunk'), + }} > {'View Graph'} diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 3c6899c5b..d998ffffc 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -1,4 +1,4 @@ -import { Box, LoadingSpinner, Flex, Typography, TextLink } from '@neo4j-ndl/react'; +import { LoadingSpinner, Flex, Typography, TextLink } from '@neo4j-ndl/react'; import { FC, useState } from 'react'; import ReactMarkdown from 'react-markdown'; import { CommunitiesProps, UserCredentials } from '../../types'; @@ -31,9 +31,9 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = return ( <> {loading ? ( - +
- +
) : communities?.length > 0 ? (
    @@ -43,8 +43,9 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = handleCommunityClick(community.element_id, 'chatInfoView')} + htmlAttributes={{ + onClick: () => handleCommunityClick(community.element_id, 'chatInfoView'), + }} >{`ID : ${community.id}`} {mode === chatModeLables['global search+vector+fulltext'] && community.score && ( @@ -60,7 +61,10 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) =
) : ( - No Communities Found + + {' '} + No Communities Found + )} {openGraphView && ( = ({ loading, mode, graphonly_entities, in return ( <> {loading ? ( - +
- +
) : (mode !== 'graph' && Object.keys(groupedEntities)?.length > 0) || (mode == 'graph' && Object.keys(graphonly_entities)?.length > 0) ? (
    @@ -76,7 +76,7 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in
      {Object.keys(label).map((key) => (
    • - + {key} = ({ loading, mode, graphonly_entities, in key={index} className='flex items-center mb-2 text-ellipsis whitespace-nowrap max-w-[100%)] overflow-hidden' > - + {label === '__Community__' ? graphLabels.community : label} ({labelCounts[label]}) = ({ loading, mode, graphonly_entities, in return ( handleEntityClick(textId!, 'chatInfoView')} + htmlAttributes={{ + onClick: () => handleEntityClick(textId!, 'chatInfoView'), + }} className={loadingGraphView ? 'cursor-wait' : 'cursor-pointer'} > {text} @@ -132,7 +134,9 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in })}
    ) : ( - No Entities Found + + No Entities Found + )} {openGraphView && ( = ({ closeChatBot, deleteOnClick, messages }) => { - const [chatAnchor, setchatAnchor] = useState(null); + const chatAnchor = useRef(null); const [showChatModeOption, setshowChatModeOption] = useState(false); return (
    setshowChatModeOption(false)} - anchorPortal={true} - disableBackdrop={true} + closeHandler={(_, reason) => { + if (reason.type === 'backdropClick') { + setshowChatModeOption(false); + } + }} open={showChatModeOption} menuAnchor={chatAnchor} + isRoot={false} /> - - { - setchatAnchor(e.currentTarget); - setshowChatModeOption(true); - }} - clean - text='Chat mode' - placement='bottom' - label='Chat mode' - > - - +
    +
    + { + setshowChatModeOption(true); + }} + clean + text='Chat mode' + placement='bottom' + label='Chat mode' + > + + +
    - - + + - +
    ); }; diff --git a/frontend/src/components/ChatBot/MetricsCheckbox.tsx b/frontend/src/components/ChatBot/MetricsCheckbox.tsx new file mode 100644 index 000000000..5b7fe89c0 --- /dev/null +++ b/frontend/src/components/ChatBot/MetricsCheckbox.tsx @@ -0,0 +1,21 @@ +import { Checkbox } from '@neo4j-ndl/react'; + +function MetricsCheckbox({ + enableReference, + toggleReferenceVisibility, + isDisabled = false, +}: { + enableReference: boolean; + toggleReferenceVisibility: React.DispatchWithoutAction; + isDisabled?: boolean; +}) { + return ( + + ); +} +export default MetricsCheckbox; diff --git a/frontend/src/components/ChatBot/MetricsTab.tsx b/frontend/src/components/ChatBot/MetricsTab.tsx index 55d37db4c..b39292d94 100644 --- a/frontend/src/components/ChatBot/MetricsTab.tsx +++ b/frontend/src/components/ChatBot/MetricsTab.tsx @@ -1,5 +1,5 @@ -import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; -import { memo, useMemo, useRef } from 'react'; +import { Banner, Box, DataGrid, DataGridComponents, Flex, IconButton, Popover, Typography } from '@neo4j-ndl/react'; +import { memo, useContext, useMemo, useRef } from 'react'; import { useReactTable, getCoreRowModel, @@ -9,6 +9,10 @@ import { getSortedRowModel, } from '@tanstack/react-table'; import { capitalize } from '../../utils/Utils'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import { metricsinfo } from '../../utils/Constants'; +import NotAvailableMetric from './NotAvailableMetric'; function MetricsTab({ metricsLoading, metricDetails, @@ -17,14 +21,14 @@ function MetricsTab({ metricsLoading: boolean; metricDetails: | { - faithfulness: number; - answer_relevancy: number; + [key: string]: number | string; } | undefined; error: string; }) { - const columnHelper = createColumnHelper<{ metric: string; score: number }>(); + const columnHelper = createColumnHelper<{ metric: string; score: number | string }>(); const tableRef = useRef(null); + const { colorMode } = useContext(ThemeWrapperContext); const columns = useMemo( () => [ @@ -39,18 +43,34 @@ function MetricsTab({ .join(' ') : capitalize(metric); return ( -
    - {capitilizedMetric} -
    + +
    + {capitilizedMetric} +
    + + + + + + + + {metricsinfo[metric]} + + +
    ); }, header: () => Metric, footer: (info) => info.column.id, }), - columnHelper.accessor((row) => row.score, { + columnHelper.accessor((row) => row.score as number, { id: 'Score', cell: (info) => { - return {info.getValue().toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, }), ], @@ -77,7 +97,9 @@ function MetricsTab({ return ( {error != undefined && error?.trim() != '' ? ( - {error} + + {error} + ) : ( , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( )} diff --git a/frontend/src/components/ChatBot/MultiModeMetrics.tsx b/frontend/src/components/ChatBot/MultiModeMetrics.tsx index 4cf4aefd6..8bd89f1d8 100644 --- a/frontend/src/components/ChatBot/MultiModeMetrics.tsx +++ b/frontend/src/components/ChatBot/MultiModeMetrics.tsx @@ -4,24 +4,28 @@ import { createColumnHelper, getFilteredRowModel, getPaginationRowModel, - getSortedRowModel, } from '@tanstack/react-table'; import { capitalize } from '../../utils/Utils'; -import { useMemo, useRef } from 'react'; -import { Banner, Box, DataGrid, DataGridComponents, Typography } from '@neo4j-ndl/react'; +import { useContext, useEffect, useMemo, useRef } from 'react'; +import { Banner, Box, DataGrid, DataGridComponents, Flex, IconButton, Popover, Typography } from '@neo4j-ndl/react'; import { multimodelmetric } from '../../types'; +import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import NotAvailableMetric from './NotAvailableMetric'; export default function MultiModeMetrics({ data, metricsLoading, error, + isWithAdditionalMetrics, }: { data: multimodelmetric[]; metricsLoading: boolean; error: string; + isWithAdditionalMetrics: boolean | null; }) { + const { colorMode } = useContext(ThemeWrapperContext); const tableRef = useRef(null); - const columnHelper = createColumnHelper(); const columns = useMemo( () => [ @@ -44,19 +48,140 @@ export default function MultiModeMetrics({ header: () => Mode, footer: (info) => info.column.id, }), - columnHelper.accessor((row) => row.answer_relevancy, { + columnHelper.accessor((row) => row.answer_relevancy as number, { id: 'Answer Relevancy', cell: (info) => { - return {info.getValue().toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; + }, + header: () => ( + + Relevancy + + + + + + + + + Determines How well the answer addresses the user's question. + + + + + ), + }), + columnHelper.accessor((row) => row.faithfulness as number, { + id: 'Faithfullness', + cell: (info) => { + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, - header: () => Answer Relevancy, + header: () => ( + + Faithful + + + + + + + + + Determines How accurately the answer reflects the provided information. + + + + + ), }), - columnHelper.accessor((row) => row.faithfulness, { - id: 'Score', + columnHelper.accessor((row) => row.context_entity_recall_score as number, { + id: 'Entity Recall Score', cell: (info) => { - return {info.getValue().toFixed(2)}; + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; }, - header: () => Faithfulness, + header: () => ( + + Context + + + + + + + + + Determines the recall of entities present in both reference and retrieved contexts. + + + + + ), + }), + columnHelper.accessor((row) => row.semantic_score as number, { + id: 'Semantic Score', + cell: (info) => { + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; + }, + header: () => ( + + Semantic + + + + + + + + + Determines How well the generated answer understands the meaning of the reference answer. + + + + + ), + }), + columnHelper.accessor((row) => row.rouge_score as number, { + id: 'Rouge Score', + cell: (info) => { + const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2); + if (value === 'N.A') { + return ; + } + return {value}; + }, + header: () => ( + + Rouge + + + + + + + + + Determines How much the generated answer matches the reference answer, word-for-word. + + + + + ), }), ], [] @@ -69,47 +194,68 @@ export default function MultiModeMetrics({ getPaginationRowModel: getPaginationRowModel(), enableGlobalFilter: false, autoResetPageIndex: false, + enableColumnResizing: true, enableRowSelection: true, enableMultiRowSelection: true, - enableSorting: true, - getSortedRowModel: getSortedRowModel(), + enableSorting: false, }); + useEffect(() => { + if (isWithAdditionalMetrics === false) { + table.setColumnVisibility({ 'Recall Score': false, 'Semantic Score': false, 'Rouge Score': false }); + } else { + table.resetColumnVisibility(true); + } + }, [isWithAdditionalMetrics, table]); + return ( {error?.trim() != '' ? ( - {error} + + {error} + ) : ( - , - PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { - return ( - + ( + - ); - }, - }} - /> + ), + PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { + return ( + + ); + }, + }} + isKeyboardNavigable={false} + /> +
)} ); diff --git a/frontend/src/components/ChatBot/NotAvailableMetric.tsx b/frontend/src/components/ChatBot/NotAvailableMetric.tsx new file mode 100644 index 000000000..f8df4c3c4 --- /dev/null +++ b/frontend/src/components/ChatBot/NotAvailableMetric.tsx @@ -0,0 +1,20 @@ +import { Flex, IconButton, Popover, Typography } from '@neo4j-ndl/react'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; + +export default function NotAvailableMetric() { + return ( + + N.A + + + + + + + + Some metrics are not available for Gemini model. + + + + ); +} diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index ddfe92a6e..ace69a07f 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -1,6 +1,6 @@ import { FC, useContext } from 'react'; import { Chunk, SourcesProps } from '../../types'; -import { Box, LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; +import { LoadingSpinner, TextLink, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline, GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; import { getLogo, isAllowedHost, youtubeLinkValidation } from '../../utils/Utils'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; @@ -28,9 +28,9 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { return ( <> {loading ? ( - +
- +
) : mode === 'entity search+vector' && uniqueChunks.length ? (
    {uniqueChunks @@ -65,7 +65,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { {isAllowedHost(link, ['wikipedia.org']) && (
    Wikipedia Logo - + = ({ loading, mode, chunks, sources }) => { <>
    - + = ({ loading, mode, chunks, sources }) => { !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && (
    - + {link}
    diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 8e8516666..85837792f 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -4,33 +4,21 @@ import { Button, Typography, Flex, StatusIndicator, useMediaQuery } from '@neo4j import { useCredentials } from '../context/UserCredentials'; import { useFileContext } from '../context/UsersFiles'; import { extractAPI } from '../utils/FileAPI'; -import { - BannerAlertProps, - ChildRef, - ContentProps, - CustomFile, - OptionType, - UserCredentials, - chunkdata, - connectionState, -} from '../types'; +import { BannerAlertProps, ChildRef, ContentProps, CustomFile, OptionType, UserCredentials, chunkdata } from '../types'; import deleteAPI from '../services/DeleteFiles'; import { postProcessing } from '../services/PostProcessing'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; -import { useSearchParams } from 'react-router-dom'; import { batchSize, buttonCaptions, chatModeLables, - defaultLLM, largeFileSize, llms, RETRY_OPIONS, tooltips, } from '../utils/Constants'; import ButtonWithToolTip from './UI/ButtonWithToolTip'; -import connectAPI from '../services/ConnectAPI'; import DropdownComponent from './Dropdown'; import GraphViewModal from './Graph/GraphViewModal'; import { lazy } from 'react'; @@ -48,7 +36,6 @@ import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingC import { getChunkText } from '../services/getChunkText'; import ChunkPopUp from './Popups/ChunkPopUp'; -const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); let afterFirstRender = false; @@ -60,30 +47,17 @@ const Content: React.FC = ({ setIsSchema, showEnhancementDialog, toggleEnhancementDialog, - closeSettingModal, + setOpenConnection, + showDisconnectButton, + connectionStatus, }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [init, setInit] = useState(false); - const [openConnection, setOpenConnection] = useState({ - openPopUp: false, - chunksExists: false, - vectorIndexMisMatch: false, - chunksExistsWithDifferentDimension: false, - }); + // const [init, setInit] = useState(false); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); const [documentName, setDocumentName] = useState(''); - const { - setUserCredentials, - userCredentials, - connectionStatus, - setConnectionStatus, - isGdsActive, - setGdsActive, - setIsReadOnlyUser, - isReadOnlyUser, - } = useCredentials(); + const { setUserCredentials, userCredentials, setConnectionStatus, isGdsActive, isReadOnlyUser } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); const [retryFile, setRetryFile] = useState(''); @@ -105,7 +79,6 @@ const Content: React.FC = ({ filesData, setFilesData, setModel, - model, selectedNodes, selectedRels, setSelectedNodes, @@ -116,11 +89,13 @@ const Content: React.FC = ({ processedCount, setProcessedCount, setchatModes, + model, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'|'neighborView'>('tableView'); + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView' | 'neighborView'>( + 'tableView' + ); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); - const [searchParams] = useSearchParams(); const { updateStatusForLargeFiles } = useServerSideEvent( @@ -133,55 +108,15 @@ const Content: React.FC = ({ } ); const childRef = useRef(null); - const incrementPage = () => { + + const incrementPage = async () => { setCurrentPage((prev) => prev + 1); + await getChunks(documentName, currentPage + 1); }; - const decrementPage = () => { + const decrementPage = async () => { setCurrentPage((prev) => prev - 1); + await getChunks(documentName, currentPage - 1); }; - useEffect(() => { - if (!init && !searchParams.has('connectURL')) { - let session = localStorage.getItem('neo4j.connection'); - if (session) { - let neo4jConnection = JSON.parse(session); - setUserCredentials({ - uri: neo4jConnection.uri, - userName: neo4jConnection.user, - password: atob(neo4jConnection.password), - database: neo4jConnection.database, - port: neo4jConnection.uri.split(':')[2], - }); - if (neo4jConnection.isgdsActive !== undefined) { - setGdsActive(neo4jConnection.isgdsActive); - } - if (neo4jConnection.isReadOnlyUser !== undefined) { - setIsReadOnlyUser(neo4jConnection.isReadOnlyUser); - } - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - } - setInit(true); - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - } - }, []); - useEffect(() => { - if (currentPage >= 1) { - (async () => { - await getChunks(documentName, currentPage); - })(); - } - }, [currentPage, documentName]); - useEffect(() => { - setFilesData((prevfiles) => { - return prevfiles.map((curfile) => { - return { - ...curfile, - model: curfile.status === 'New' || curfile.status === 'Reprocess' ? model : curfile.model, - }; - }); - }); - }, [model]); useEffect(() => { if (afterFirstRender) { @@ -193,8 +128,44 @@ const Content: React.FC = ({ if (processedCount === 1 && queue.isEmpty()) { (async () => { showNormalToast(); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + try { + const payload = isGdsActive + ? postProcessingTasks + : postProcessingTasks.filter((task) => task !== 'enable_communities'); + const response = await postProcessing(userCredentials as UserCredentials, payload); + if (response.data.status === 'Success') { + const communityfiles = response.data?.data; + if (Array.isArray(communityfiles) && communityfiles.length) { + communityfiles?.forEach((c: any) => { + setFilesData((prev) => { + return prev.map((f) => { + if (f.name === c.filename) { + return { + ...f, + chunkNodeCount: c.chunkNodeCount ?? 0, + entityNodeCount: c.entityNodeCount ?? 0, + communityNodeCount: c.communityNodeCount ?? 0, + chunkRelCount: c.chunkRelCount ?? 0, + entityEntityRelCount: c.entityEntityRelCount ?? 0, + communityRelCount: c.communityRelCount ?? 0, + nodesCount: c.nodeCount, + relationshipsCount: c.relationshipCount, + }; + } + return f; + }); + }); + }); + } + showSuccessToast('All Q&A functionality is available now.'); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + showSuccessToast(error.message); + } + } })(); } }, [processedCount, userCredentials, queue, isReadOnlyUser, isGdsActive]); @@ -213,65 +184,21 @@ const Content: React.FC = ({ } }, [isSchema]); - useEffect(() => { - const connection = localStorage.getItem('neo4j.connection'); - if (connection != null) { - (async () => { - const parsedData = JSON.parse(connection); - const response = await connectAPI( - parsedData.uri, - parsedData.user, - atob(parsedData.password), - parsedData.database - ); - if (response?.data?.status === 'Success') { - localStorage.setItem( - 'neo4j.connection', - JSON.stringify({ - ...parsedData, - userDbVectorIndex: response.data.data.db_vector_dimension, - password: btoa(atob(parsedData.password)), - }) - ); - if (response.data.data.gds_status !== undefined) { - setGdsActive(response.data.data.gds_status); - } - if (response.data.data.write_access !== undefined) { - setIsReadOnlyUser(!response.data.data.write_access); - } - if ( - (response.data.data.application_dimension === response.data.data.db_vector_dimension || - response.data.data.db_vector_dimension == 0) && - !response.data.data.chunks_exists - ) { - setConnectionStatus(true); - setOpenConnection((prev) => ({ ...prev, openPopUp: false })); - } else { - setOpenConnection({ - openPopUp: true, - chunksExists: response.data.data.chunks_exists as boolean, - vectorIndexMisMatch: - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension, - chunksExistsWithDifferentDimension: - response.data.data.db_vector_dimension > 0 && - response.data.data.db_vector_dimension != response.data.data.application_dimension && - (response.data.data.chunks_exists ?? true), - }); - setConnectionStatus(false); - } - } else { - setOpenConnection((prev) => ({ ...prev, openPopUp: true })); - setConnectionStatus(false); - } - })(); - } - }, []); - const handleDropdownChange = (selectedOption: OptionType | null | void) => { if (selectedOption?.value) { setModel(selectedOption?.value); } + setFilesData((prevfiles) => { + return prevfiles.map((curfile) => { + return { + ...curfile, + model: + curfile.status === 'New' || curfile.status === 'Ready to Reprocess' + ? selectedOption?.value ?? '' + : curfile.model, + }; + }); + }); }; const getChunks = async (name: string, pageNo: number) => { toggleChunksLoading(); @@ -282,6 +209,7 @@ const Content: React.FC = ({ } toggleChunksLoading(); }; + const extractData = async (uid: string, isselectedRows = false, filesTobeProcess: CustomFile[]) => { if (!isselectedRows) { const fileItem = filesData.find((f) => f.id == uid); @@ -423,7 +351,7 @@ const Content: React.FC = ({ showNormalToast(`Processing ${batch.length} files at a time.`); for (let i = 0; i < batch.length; i++) { if (newCheck) { - if (batch[i]?.status === 'New' || batch[i].status === 'Reprocess') { + if (batch[i]?.status === 'New' || batch[i].status === 'Ready to Reprocess') { data.push(extractData(batch[i].id, isSelectedFiles, selectedFiles as CustomFile[])); } } else { @@ -436,8 +364,41 @@ const Content: React.FC = ({ const addFilesToQueue = async (remainingFiles: CustomFile[]) => { if (!remainingFiles.length) { showNormalToast(); - await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + try { + const response = await postProcessing(userCredentials as UserCredentials, postProcessingTasks); + if (response.data.status === 'Success') { + const communityfiles = response.data?.data; + if (Array.isArray(communityfiles) && communityfiles.length) { + communityfiles?.forEach((c: any) => { + setFilesData((prev) => { + return prev.map((f) => { + if (f.name === c.filename) { + return { + ...f, + chunkNodeCount: c.chunkNodeCount ?? 0, + entityNodeCount: c.entityNodeCount ?? 0, + communityNodeCount: c.communityNodeCount ?? 0, + chunkRelCount: c.chunkRelCount ?? 0, + entityEntityRelCount: c.entityEntityRelCount ?? 0, + communityRelCount: c.communityRelCount ?? 0, + nodesCount: c.nodeCount, + relationshipsCount: c.relationshipCount, + }; + } + return f; + }); + }); + }); + } + showSuccessToast('All Q&A functionality is available now.'); + } else { + throw new Error(response.data.error); + } + } catch (error) { + if (error instanceof Error) { + showSuccessToast(error.message); + } + } } for (let index = 0; index < remainingFiles.length; index++) { const f = remainingFiles[index]; @@ -544,7 +505,7 @@ const Content: React.FC = ({ } else { const selectedNewFiles = childRef.current ?.getSelectedRows() - .filter((f) => f.status === 'New' || f.status == 'Reprocess'); + .filter((f) => f.status === 'New' || f.status == 'Ready to Reprocess'); addFilesToQueue(selectedNewFiles as CustomFile[]); } }; @@ -579,6 +540,7 @@ const Content: React.FC = ({ setProcessedCount(0); setConnectionStatus(false); localStorage.removeItem('password'); + localStorage.removeItem('selectedModel'); setUserCredentials({ uri: '', password: '', userName: '', database: '' }); setSelectedNodes([]); setSelectedRels([]); @@ -600,10 +562,10 @@ const Content: React.FC = ({ return f.name === filename ? { ...f, - status: 'Reprocess', + status: 'Ready to Reprocess', processingProgress: isStartFromBegining ? 0 : f.processingProgress, nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, + relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount, } : f; }); @@ -628,7 +590,8 @@ const Content: React.FC = ({ ); const newFilecheck = useMemo( - () => childRef.current?.getSelectedRows().filter((f) => f.status === 'New' || f.status == 'Reprocess').length, + () => + childRef.current?.getSelectedRows().filter((f) => f.status === 'New' || f.status == 'Ready to Reprocess').length, [childRef.current?.getSelectedRows()] ); @@ -638,7 +601,7 @@ const Content: React.FC = ({ ); const dropdowncheck = useMemo( - () => !filesData.some((f) => f.status === 'New' || f.status === 'Waiting' || f.status === 'Reprocess'), + () => !filesData.some((f) => f.status === 'New' || f.status === 'Waiting' || f.status === 'Ready to Reprocess'), [filesData] ); @@ -658,12 +621,12 @@ const Content: React.FC = ({ if (selectedRows?.length) { for (let index = 0; index < selectedRows.length; index++) { const parsedFile: CustomFile = selectedRows[index]; - if (parsedFile.status === 'New' || parsedFile.status == 'Reprocess') { + if (parsedFile.status === 'New' || parsedFile.status == 'Ready to Reprocess') { newstatusfiles.push(parsedFile); } } } else if (filesData.length) { - newstatusfiles = filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess'); + newstatusfiles = filesData.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess'); } return newstatusfiles; }, [filesData, childRef.current?.getSelectedRows()]); @@ -715,7 +678,7 @@ const Content: React.FC = ({ if ( parsedData.fileSource === 'local file' && typeof parsedData.size === 'number' && - (parsedData.status === 'New' || parsedData.status == 'Reprocess') && + (parsedData.status === 'New' || parsedData.status == 'Ready to Reprocess') && parsedData.size > largeFileSize ) { selectedLargeFiles.push(parsedData); @@ -724,16 +687,20 @@ const Content: React.FC = ({ if (selectedLargeFiles.length) { setshowConfirmationModal(true); } else { - handleGenerateGraph(selectedRows.filter((f) => f.status === 'New' || f.status === 'Reprocess')); + handleGenerateGraph(selectedRows.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess')); } } else if (filesData.length) { const largefiles = filesData.filter((f) => { - if (typeof f.size === 'number' && (f.status === 'New' || f.status == 'Reprocess') && f.size > largeFileSize) { + if ( + typeof f.size === 'number' && + (f.status === 'New' || f.status == 'Ready to Reprocess') && + f.size > largeFileSize + ) { return true; } return false; }); - const selectAllNewFiles = filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess'); + const selectAllNewFiles = filesData.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess'); const stringified = selectAllNewFiles.reduce((accu, f) => { const key = f.id; // @ts-ignore @@ -744,7 +711,7 @@ const Content: React.FC = ({ if (largefiles.length) { setshowConfirmationModal(true); } else { - handleGenerateGraph(filesData.filter((f) => f.status === 'New' || f.status === 'Reprocess')); + handleGenerateGraph(filesData.filter((f) => f.status === 'New' || f.status === 'Ready to Reprocess')); } } }; @@ -814,24 +781,10 @@ const Content: React.FC = ({ > )} {showEnhancementDialog && ( - + )}
    - }> - -
    Neo4j connection {isReadOnlyUser ? '(Read only Mode)' : ''} @@ -840,24 +793,28 @@ const Content: React.FC = ({ isGdsActive={isGdsActive} uri={userCredentials && userCredentials?.uri} /> -
    - {!isSchema ? ( - - ) : selectedNodes.length || selectedRels.length ? ( - - ) : ( - - )} - {isSchema ? ( - - {(!selectedNodes.length || !selectedNodes.length) && 'Empty'} Graph Schema configured - {selectedNodes.length || selectedRels.length - ? `(${selectedNodes.length} Labels + ${selectedRels.length} Rel Types)` - : ''} - - ) : ( - No Graph Schema configured - )} +
    +
    + {!isSchema ? ( + + ) : selectedNodes.length || selectedRels.length ? ( + + ) : ( + + )} +
    +
    + {isSchema ? ( + + {(!selectedNodes.length || !selectedNodes.length) && 'Empty'} Graph Schema configured + {selectedNodes.length || selectedRels.length + ? `(${selectedNodes.length} Labels + ${selectedRels.length} Rel Types)` + : ''} + + ) : ( + No Graph Schema configured + )} +
    @@ -882,9 +839,11 @@ const Content: React.FC = ({ {buttonCaptions.connectToNeo4j} ) : ( - + showDisconnectButton && ( + + ) )}
    @@ -909,7 +868,7 @@ const Content: React.FC = ({ setTotalPageCount(null); } setCurrentPage(1); - // await getChunks(name, 1); + await getChunks(name, 1); } }} ref={childRef} @@ -926,7 +885,7 @@ const Content: React.FC = ({ onSelect={handleDropdownChange} options={llms ?? ['']} placeholder='Select LLM Model' - defaultValue={defaultLLM} + defaultValue={model} view='ContentView' isDisabled={false} /> diff --git a/frontend/src/components/DataSources/AWS/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx index c23b94a3b..0e9b2f49f 100644 --- a/frontend/src/components/DataSources/AWS/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -40,6 +40,12 @@ const S3Modal: React.FC = ({ hideModal, open }) => { processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; if (url) { setValid(validation(bucketUrl) && isFocused); @@ -153,55 +159,61 @@ const S3Modal: React.FC = ({ hideModal, open }) => {
    setValid(validation(bucketUrl) && isFocused), + onKeyDown: handleKeyDown, + 'aria-label': 'Bucket URL', + placeholder: 's3://data.neo4j.com/pdf/', + }} value={bucketUrl} - disabled={false} + isDisabled={false} label='Bucket URL' - aria-label='Bucket URL' - placeholder='s3://data.neo4j.com/pdf/' - autoFocus - fluid - required + isFluid={true} + isRequired={true} errorText={!isValid && isFocused && 'Please Fill The Valid URL'} - onBlur={() => setValid(validation(bucketUrl) && isFocused)} onChange={(e) => { setisFocused(true); setBucketUrl(e.target.value); }} - onKeyDown={handleKeyDown} />
    { setAccessKey(e.target.value); }} - onKeyDown={handleKeyDown} /> { setSecretKey(e.target.value); }} - onKeyDown={handleKeyDown} />
    diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index a68faeb97..288bff00a 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -1,5 +1,5 @@ import { TextInput } from '@neo4j-ndl/react'; -import { useCallback, useEffect, useState } from 'react'; +import { useCallback, useState } from 'react'; import { useCredentials } from '../../../context/UserCredentials'; import { useFileContext } from '../../../context/UsersFiles'; import { urlScanAPI } from '../../../services/URLScan'; @@ -33,6 +33,12 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; const reset = () => { @@ -41,17 +47,6 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => setprojectId(''); }; - useEffect(() => { - if (status != 'unknown') { - setTimeout(() => { - setStatusMessage(''); - setStatus('unknown'); - reset(); - hideModal(); - }, 5000); - } - }, []); - const googleLogin = useGoogleLogin({ onSuccess: async (codeResponse) => { try { @@ -195,48 +190,55 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) =>
    { setprojectId(e.target.value); }} - onKeyDown={handleKeyPress} > { setbucketName(e.target.value); }} - onKeyDown={handleKeyPress} /> { setFolderName(e.target.value); }} - onKeyDown={handleKeyPress} />
    diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index bddbb65f0..209573eaf 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -1,5 +1,5 @@ import { Dropzone, Flex, Typography } from '@neo4j-ndl/react'; -import { useState, useEffect, FunctionComponent } from 'react'; +import { useState, FunctionComponent, useEffect } from 'react'; import Loader from '../../../utils/Loader'; import { v4 as uuidv4 } from 'uuid'; import { useCredentials } from '../../../context/UserCredentials'; @@ -34,6 +34,12 @@ const DropZone: FunctionComponent = () => { processingProgress: undefined, retryOptionStatus: false, retryOption: '', + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; const copiedFilesData: CustomFile[] = [...filesData]; @@ -68,7 +74,6 @@ const DropZone: FunctionComponent = () => { setFilesData(copiedFilesData); } }; - useEffect(() => { if (selectedFiles.length > 0) { for (let index = 0; index < selectedFiles.length; index++) { @@ -79,6 +84,7 @@ const DropZone: FunctionComponent = () => { } } }, [selectedFiles]); + const uploadFileInChunks = (file: File) => { const totalChunks = Math.ceil(file.size / chunkSize); const chunkProgressIncrement = 100 / totalChunks; @@ -127,7 +133,7 @@ const DropZone: FunctionComponent = () => { if (curfile.name == file.name) { return { ...curfile, - uploadprogess: chunkNumber * chunkProgressIncrement, + uploadProgress: chunkNumber * chunkProgressIncrement, }; } return curfile; @@ -139,7 +145,7 @@ const DropZone: FunctionComponent = () => { if (curfile.name == file.name) { return { ...curfile, - uploadprogess: chunkNumber * chunkProgressIncrement, + uploadProgress: chunkNumber * chunkProgressIncrement, }; } return curfile; @@ -179,7 +185,7 @@ const DropZone: FunctionComponent = () => { return { ...curfile, status: 'New', - uploadprogess: 100, + uploadProgress: 100, }; } return curfile; diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index d7fb1e56d..f3fbff852 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -15,18 +15,7 @@ export default function DropZoneForSmallLayouts() { const [isLoading, setIsLoading] = useState(false); const [isClicked, setIsClicked] = useState(false); const { userCredentials } = useCredentials(); - const [selectedFiles, setSelectedFiles] = useState([]); - useEffect(() => { - if (selectedFiles.length > 0) { - for (let index = 0; index < selectedFiles.length; index++) { - const file = selectedFiles[index]; - if (filesData[index]?.status == 'None' && isClicked) { - uploadFileInChunks(file); - } - } - } - }, [selectedFiles]); const uploadFileInChunks = (file: File) => { const totalChunks = Math.ceil(file.size / chunkSize); @@ -180,6 +169,12 @@ export default function DropZoneForSmallLayouts() { processingProgress: undefined, retryOption: '', retryOptionStatus: false, + chunkNodeCount: 0, + chunkRelCount: 0, + entityNodeCount: 0, + entityEntityRelCount: 0, + communityNodeCount: 0, + communityRelCount: 0, }; const copiedFilesData: CustomFile[] = [...filesData]; @@ -214,11 +209,21 @@ export default function DropZoneForSmallLayouts() { setFilesData(copiedFilesData); } }; + useEffect(() => { + if (selectedFiles.length > 0) { + for (let index = 0; index < selectedFiles.length; index++) { + const file = selectedFiles[index]; + if (filesData[index]?.status == 'None' && isClicked) { + uploadFileInChunks(file); + } + } + } + }, [selectedFiles]); return ( <>
    - - {isLoading ? : } + + {isLoading ? : }
    ); diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 3aae5aa6f..2782addd9 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -1,4 +1,4 @@ -import { Dropdown, Tip, useMediaQuery } from '@neo4j-ndl/react'; +import { Tooltip, useMediaQuery, Select } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; import { memo, useMemo } from 'react'; import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; @@ -17,14 +17,17 @@ const DropdownComponent: React.FC = ({ const isLargeDesktop = useMediaQuery(`(min-width:1440px )`); const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); + const existingModel = localStorage.getItem('selectedModel'); + if (existingModel != selectedOption?.value) { + localStorage.setItem('selectedModel', selectedOption?.value ?? ''); + } }; const allOptions = useMemo(() => options, [options]); return ( <>
    - LLM Model used for Extraction & Chat
    } selectProps={{ @@ -36,12 +39,12 @@ const DropdownComponent: React.FC = ({ const isModelSupported = !isProdEnv || prodllms?.includes(value); return { label: !isModelSupported ? ( - - + + {label} - - Available In Development Version - + + Available In Development Version + ) : ( {label} ), @@ -58,7 +61,10 @@ const DropdownComponent: React.FC = ({ value: value, }} size='medium' - fluid + isFluid + htmlAttributes={{ + 'aria-label': 'A selection dropdown', + }} /> {children}
    diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 42b82572a..d9b6e8f5c 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -1,5 +1,4 @@ import { - Checkbox, DataGrid, DataGridComponents, Flex, @@ -9,6 +8,7 @@ import { TextLink, Typography, useCopyToClipboard, + Checkbox, } from '@neo4j-ndl/react'; import { forwardRef, useContext, useEffect, useImperativeHandle, useMemo, useRef, useState } from 'react'; import { @@ -51,9 +51,10 @@ import { XMarkIconOutline } from '@neo4j-ndl/react/icons'; import cancelAPI from '../services/CancelAPI'; import { IconButtonWithToolTip } from './UI/IconButtonToolTip'; import { batchSize, largeFileSize, llms } from '../utils/Constants'; -import IndeterminateCheckbox from './UI/CustomCheckBox'; import { showErrorToast, showNormalToast } from '../utils/toasts'; import { ThemeWrapperContext } from '../context/ThemeWrapper'; +import BreakDownPopOver from './BreakDownPopOver'; + let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { @@ -103,32 +104,30 @@ const FileTable = forwardRef((props, ref) => { .includes('Processing'); return ( ); }, cell: ({ row }: { row: Row }) => { return (
    -
    ); @@ -161,23 +160,25 @@ const FileTable = forwardRef((props, ref) => { if (info.getValue() != 'Processing') { return (
    - - {info.getValue()} +
    + +
    +
    {info.getValue()}
    {(info.getValue() === 'Completed' || info.getValue() === 'Failed' || info.getValue() === 'Cancelled') && !isReadOnlyUser && ( onRetry(info?.row?.id as string)} > - + )} @@ -185,16 +186,22 @@ const FileTable = forwardRef((props, ref) => { ); } else if (info.getValue() === 'Processing' && info.row.original.processingProgress === undefined) { return ( -
    - - Processing +
    +
    + +
    +
    + Processing +
    { cancelHandler( info.row.original.name as string, @@ -223,10 +230,12 @@ const FileTable = forwardRef((props, ref) => {
    { cancelHandler( info.row.original.name as string, @@ -242,9 +251,11 @@ const FileTable = forwardRef((props, ref) => { ); } return ( -
    - - {info.getValue()} +
    +
    + +
    +
    {info.getValue()}
    ); }, @@ -304,7 +315,7 @@ const FileTable = forwardRef((props, ref) => { }, }, ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -313,26 +324,32 @@ const FileTable = forwardRef((props, ref) => { cell: (info: CellContext) => { if (parseInt(info.getValue()) === 100 || info.row.original?.status === 'New') { return ( - - - Uploaded - +
    + + + + Uploaded +
    ); } else if (info.row.original?.status === 'Uploading') { return ; } else if (info.row.original?.status === 'Failed') { return ( - - - NA - +
    + + + + NA +
    ); } return ( - - - Uploaded - +
    + + + + Uploaded +
    ); }, header: () => Upload Status, @@ -355,7 +372,7 @@ const FileTable = forwardRef((props, ref) => { return ( - + {info.row.original.fileSource} @@ -400,7 +417,7 @@ const FileTable = forwardRef((props, ref) => { }; }), ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -443,7 +460,7 @@ const FileTable = forwardRef((props, ref) => { }; }), ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), @@ -484,19 +501,48 @@ const FileTable = forwardRef((props, ref) => { }; }), ], - defaultSortingActions: false, + hasDefaultSortingActions: false, }, }, }), columnHelper.accessor((row) => row.nodesCount, { id: 'NodesCount', - cell: (info) => {info.getValue()}, + cell: (info) => { + const hasNodeBreakDownValues = + info.row.original.chunkNodeCount > 0 || + info.row.original.communityNodeCount > 0 || + info.row.original.entityNodeCount > 0; + + return ( + + {info.getValue()} + {hasNodeBreakDownValues && + (info.row.original.status === 'Completed' || info.row.original.status === 'Failed') && ( + + )} + + ); + }, header: () => Nodes, footer: (info) => info.column.id, }), columnHelper.accessor((row) => row.relationshipsCount, { id: 'relationshipCount', - cell: (info) => {info.getValue()}, + cell: (info) => { + const hasRelationsBreakDownValues = + info.row.original.chunkRelCount > 0 || + info.row.original.communityRelCount > 0 || + info.row.original.entityEntityRelCount > 0; + return ( + + {info.getValue()} + {hasRelationsBreakDownValues && + (info.row.original.status === 'Completed' || info.row.original.status === 'Failed') && ( + + )} + + ); + }, header: () => Relations, footer: (info) => info.column.id, }), @@ -513,7 +559,7 @@ const FileTable = forwardRef((props, ref) => { clean onClick={() => onInspect(info?.row?.original?.name as string)} > - + ((props, ref) => { label='chunktextaction' text='View Chunks' size='large' - disabled={info.getValue() === 'Uploading'} + disabled={info.getValue() === 'Uploading' || info.getValue() === 'New'} > - + ), - size: 300, + maxSize: 300, minSize: 180, header: () => Actions, footer: (info) => info.column.id, @@ -704,6 +750,12 @@ const FileTable = forwardRef((props, ref) => { accessToken: item?.accessToken ?? '', retryOption: item.retry_condition ?? '', retryOptionStatus: false, + chunkNodeCount: item.chunkNodeCount ?? 0, + chunkRelCount: item.chunkRelCount ?? 0, + entityNodeCount: item.entityNodeCount ?? 0, + entityEntityRelCount: item.entityEntityRelCount ?? 0, + communityNodeCount: item.communityNodeCount ?? 0, + communityRelCount: item.communityRelCount ?? 0, }); } }); @@ -831,6 +883,12 @@ const FileTable = forwardRef((props, ref) => { status, processed_chunk = 0, total_chunks, + chunkNodeCount, + entityNodeCount, + communityNodeCount, + chunkRelCount, + entityEntityRelCount, + communityRelCount, } = file_name; if (fileName && total_chunks) { setFilesData((prevfiles) => @@ -840,10 +898,16 @@ const FileTable = forwardRef((props, ref) => { ...curfile, status: status, nodesCount: nodeCount, - relationshipCount: relationshipCount, + relationshipsCount: relationshipCount, model: model, processingTotalTime: processingTime?.toFixed(2), processingProgress: Math.floor((processed_chunk / total_chunks) * 100), + chunkNodeCount: chunkNodeCount ?? 0, + entityNodeCount: entityNodeCount ?? 0, + communityNodeCount: communityNodeCount ?? 0, + chunkRelCount: chunkRelCount ?? 0, + entityEntityRelCount: entityEntityRelCount ?? 0, + communityRelCount: communityRelCount ?? 0, }; } return curfile; @@ -861,7 +925,20 @@ const FileTable = forwardRef((props, ref) => { const updateProgress = (i: statusupdate) => { const { file_name } = i; - const { fileName, nodeCount = 0, relationshipCount = 0, status, processed_chunk = 0, total_chunks } = file_name; + const { + fileName, + nodeCount = 0, + relationshipCount = 0, + status, + processed_chunk = 0, + total_chunks, + chunkNodeCount, + entityNodeCount, + communityNodeCount, + chunkRelCount, + entityEntityRelCount, + communityRelCount, + } = file_name; if (fileName && total_chunks) { setFilesData((prevfiles) => prevfiles.map((curfile) => { @@ -870,8 +947,14 @@ const FileTable = forwardRef((props, ref) => { ...curfile, status: status, nodesCount: nodeCount, - relationshipCount: relationshipCount, + relationshipsCount: relationshipCount, processingProgress: Math.floor((processed_chunk / total_chunks) * 100), + chunkNodeCount: chunkNodeCount ?? 0, + entityNodeCount: entityNodeCount ?? 0, + communityNodeCount: communityNodeCount ?? 0, + chunkRelCount: chunkRelCount ?? 0, + entityEntityRelCount: entityEntityRelCount ?? 0, + communityRelCount: communityRelCount ?? 0, }; } return curfile; @@ -887,28 +970,6 @@ const FileTable = forwardRef((props, ref) => { }), [table] ); - useEffect(() => { - if (tableRef.current) { - // Component has content, calculate maximum height for table - // Observes the height of the content and calculates own height accordingly - const resizeObserver = new ResizeObserver((entries) => { - for (let index = 0; index < entries.length; index++) { - const entry = entries[index]; - const { height } = entry.contentRect; - const rowHeight = document?.getElementsByClassName('ndl-data-grid-td')?.[0]?.clientHeight ?? 69; - table.setPageSize(Math.floor(height / rowHeight)); - } - }); - - const [contentElement] = document.getElementsByClassName('ndl-data-grid-scrollable'); - resizeObserver.observe(contentElement); - - return () => { - // Stop observing content after cleanup - resizeObserver.unobserve(contentElement); - }; - } - }, []); const classNameCheck = isExpanded ? 'fileTableWithExpansion' : `filetable`; @@ -927,7 +988,7 @@ const FileTable = forwardRef((props, ref) => { tableInstance={table} styling={{ borderStyle: 'all-sides', - zebraStriping: true, + hasZebraStriping: true, headerStyle: 'clean', }} isLoading={isLoading} @@ -935,7 +996,13 @@ const FileTable = forwardRef((props, ref) => { className: classNameCheck, }} components={{ - Body: (props) => , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( ((props, ref) => { ); }, }} + isKeyboardNavigable={false} />
    diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index b8caf7484..220b138f7 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -15,25 +15,25 @@ const CheckboxSelection: React.FC = ({
    {isDocChunk && ( handleChange('DocumentChunk')} /> )} {isEntity && ( handleChange('Entities')} /> )} {isCommunity && ( handleChange('Communities')} /> )} diff --git a/frontend/src/components/Graph/GraphPropertiesPanel.tsx b/frontend/src/components/Graph/GraphPropertiesPanel.tsx index 64332c9ac..a11399971 100644 --- a/frontend/src/components/Graph/GraphPropertiesPanel.tsx +++ b/frontend/src/components/Graph/GraphPropertiesPanel.tsx @@ -12,6 +12,15 @@ const isNode = (item: BasicNode | BasicRelationship): item is BasicNode => { const GraphPropertiesPanel = ({ inspectedItem, newScheme }: GraphPropertiesPanelProps) => { const inspectedItemType = isNode(inspectedItem) ? 'node' : 'relationship'; + const filteredProperties = + inspectedItemType === 'node' + ? Object.entries((inspectedItem as BasicNode).properties) + .filter(([, value]) => value !== null && value !== undefined && value !== ' ') + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record) + : {}; const properties = inspectedItemType === 'node' ? [ @@ -20,9 +29,9 @@ const GraphPropertiesPanel = ({ inspectedItem, newScheme }: GraphPropertiesPanel value: `${(inspectedItem as BasicNode).id}`, type: 'String', }, - ...Object.keys((inspectedItem as BasicNode).properties).map((key) => { - const value = (inspectedItem as BasicNode).properties[key]; - return { key: key, value: value ?? '' }; + ...Object.keys(filteredProperties).map((key) => { + const value = filteredProperties[key]; + return { key, value }; }), ] : [ diff --git a/frontend/src/components/Graph/GraphPropertiesTable.tsx b/frontend/src/components/Graph/GraphPropertiesTable.tsx index fa270455b..fcabb0103 100644 --- a/frontend/src/components/Graph/GraphPropertiesTable.tsx +++ b/frontend/src/components/Graph/GraphPropertiesTable.tsx @@ -16,8 +16,10 @@ const GraphPropertiesTable = ({ propertiesWithTypes }: GraphPropertiesTableProps
    {key} diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index ac35a93ae..1360d8418 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -1,4 +1,4 @@ -import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, useDebounce } from '@neo4j-ndl/react'; +import { Banner, Dialog, Flex, IconButtonArray, LoadingSpinner, useDebounceValue } from '@neo4j-ndl/react'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { BasicNode, @@ -17,6 +17,7 @@ import type { Node, Relationship } from '@neo4j-nvl/base'; import { ArrowPathIconOutline, FitToScreenIcon, + InformationCircleIconOutline, MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; @@ -53,10 +54,11 @@ const GraphViewModal: React.FunctionComponent = ({ const [scheme, setScheme] = useState({}); const [newScheme, setNewScheme] = useState({}); const [searchQuery, setSearchQuery] = useState(''); - const debouncedQuery = useDebounce(searchQuery, 300); + const [debouncedQuery] = useDebounceValue(searchQuery, 300); const [graphType, setGraphType] = useState([]); const [disableRefresh, setDisableRefresh] = useState(false); const [selected, setSelected] = useState<{ type: EntityType; id: string } | undefined>(undefined); + const [mode, setMode] = useState(false); const graphQuery: string = graphType.includes('DocumentChunk') && graphType.includes('Entities') @@ -96,7 +98,12 @@ const GraphViewModal: React.FunctionComponent = ({ }, []); useEffect(() => { - const updateGraphType = graphTypeFromNodes(allNodes); + let updateGraphType; + if (mode) { + updateGraphType = graphTypeFromNodes(nodes); + } else { + updateGraphType = graphTypeFromNodes(allNodes); + } if (Array.isArray(updateGraphType)) { setGraphType(updateGraphType); } @@ -259,11 +266,10 @@ const GraphViewModal: React.FunctionComponent = ({ const newGraphSelected = [...graphType]; if (currentIndex === -1) { newGraphSelected.push(graph); - initGraph(newGraphSelected, allNodes, allRelationships, scheme); } else { newGraphSelected.splice(currentIndex, 1); - initGraph(newGraphSelected, allNodes, allRelationships, scheme); } + initGraph(newGraphSelected, allNodes, allRelationships, scheme); setSearchQuery(''); setGraphType(newGraphSelected); setSelected(undefined); @@ -294,11 +300,8 @@ const GraphViewModal: React.FunctionComponent = ({ // Refresh the graph with nodes and relations if file is processing const handleRefresh = () => { setDisableRefresh(true); + setMode(true); graphApi('refreshMode'); - setGraphType(graphType); - setNodes(nodes); - setRelationships(relationships); - setScheme(newScheme); }; // when modal closes reset all states to default @@ -339,13 +342,23 @@ const GraphViewModal: React.FunctionComponent = ({ id: 'default-menu', }} size='unset' - open={open} - aria-labelledby='form-dialog-title' - disableCloseButton={false} + isOpen={open} + hasDisabledCloseButton={false} onClose={onClose} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > - + {headerTitle} + {viewPoint !== graphLabels.chatInfoView && ( +
    + + + + {graphLabels.chunksInfo} +
    + )} {checkBoxView && ( = ({
    ) : status !== 'unknown' ? (
    - +
    ) : nodes.length === 0 && relationships.length === 0 && graphType.length !== 0 ? (
    - +
    ) : graphType.length === 0 && checkBoxView ? (
    - +
    ) : ( <> @@ -390,7 +403,7 @@ const GraphViewModal: React.FunctionComponent = ({ }} nvlCallbacks={nvlCallbacks} /> - + {viewPoint !== 'chatInfoView' && ( = ({ placement='left' disabled={disableRefresh} > - + )} - + - + = ({ onClick={handleZoomToFit} placement='left' > - +
    diff --git a/frontend/src/components/Graph/ResultOverview.tsx b/frontend/src/components/Graph/ResultOverview.tsx index 142c9fabc..6c3de167a 100644 --- a/frontend/src/components/Graph/ResultOverview.tsx +++ b/frontend/src/components/Graph/ResultOverview.tsx @@ -114,17 +114,25 @@ const ResultOverview: React.FunctionComponent = ({ {graphLabels.resultOverview}
    { setSearchQuery(e.target.value); }} - placeholder='Search On Node Properties' - fluid={true} - leftIcon={ - - + isFluid={true} + leftElement={ + + } /> diff --git a/frontend/src/components/Layout/DrawerChatbot.tsx b/frontend/src/components/Layout/DrawerChatbot.tsx index 3150e96a5..f00558aa0 100644 --- a/frontend/src/components/Layout/DrawerChatbot.tsx +++ b/frontend/src/components/Layout/DrawerChatbot.tsx @@ -2,16 +2,25 @@ import { Drawer } from '@neo4j-ndl/react'; import Chatbot from '../ChatBot/Chatbot'; import { DrawerChatbotProps, Messages } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; +import { useLocation } from 'react-router'; +import { useEffect } from 'react'; const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryData, messages, connectionStatus }) => { const { setMessages } = useMessageContext(); + const location = useLocation(); + + useEffect(() => { + if (location && location.state) { + setMessages(location.state); + } + }, [location]); const getIsLoading = (messages: Messages[]) => { return messages.some((msg) => msg.isTyping || msg.isLoading); }; return (
    - + = ({ showGCSModal, showGenericModal, }) => { - const [isBackendConnected, setIsBackendConnected] = useState(false); const { closeAlert, alertState } = useAlertContext(); - const { isReadOnlyUser } = useCredentials(); - - useEffect(() => { - async function getHealthStatus() { - try { - const response = await healthStatus(); - setIsBackendConnected(response.data.healthy); - } catch (error) { - setIsBackendConnected(false); - } - } - getHealthStatus(); - }, []); + const { isReadOnlyUser, isBackendConnected } = useCredentials(); const isYoutubeOnlyCheck = useMemo( () => APP_SOURCES?.includes('youtube') && !APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('web'), @@ -55,9 +41,9 @@ const DrawerDropzone: React.FC = ({ return (
    - + {!isReadOnlyUser ? ( - + {alertState.showAlert && ( = ({ }`} > {process.env.VITE_ENV != 'PROD' && ( - + {!isBackendConnected ? : } @@ -211,7 +197,7 @@ const DrawerDropzone: React.FC = ({
    ) : ( - + This user account does not have permission to access or manage data sources. diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 752c31660..2cd243b29 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -5,97 +5,266 @@ import { SunIconOutline, CodeBracketSquareIconOutline, InformationCircleIconOutline, + ArrowTopRightOnSquareIconOutline, + TrashIconOutline, + ArrowLeftIconOutline, + ArrowDownTrayIconOutline, } from '@neo4j-ndl/react/icons'; -import { Typography } from '@neo4j-ndl/react'; -import { memo, useCallback, useContext, useEffect } from 'react'; +import { Button, TextLink, Typography } from '@neo4j-ndl/react'; +import { Dispatch, memo, SetStateAction, useCallback, useContext, useEffect, useRef, useState } from 'react'; import { IconButtonWithToolTip } from '../UI/IconButtonToolTip'; -import { tooltips } from '../../utils/Constants'; +import { buttonCaptions, tooltips } from '../../utils/Constants'; import { useFileContext } from '../../context/UsersFiles'; import { ThemeWrapperContext } from '../../context/ThemeWrapper'; +import { useCredentials } from '../../context/UserCredentials'; +import { useNavigate } from 'react-router'; +import { useMessageContext } from '../../context/UserMessages'; +import { RiChatSettingsLine } from 'react-icons/ri'; +import ChatModeToggle from '../ChatBot/ChatModeToggle'; +import { connectionState } from '../../types'; +import { downloadClickHandler, getIsLoading } from '../../utils/Utils'; -function Header() { - const { colorMode, toggleColorMode } = useContext(ThemeWrapperContext); +interface HeaderProp { + chatOnly?: boolean; + deleteOnClick?: () => void; + setOpenConnection?: Dispatch>; + showBackButton?: boolean; +} +const Header: React.FC = ({ chatOnly, deleteOnClick, setOpenConnection, showBackButton }) => { + const { colorMode, toggleColorMode } = useContext(ThemeWrapperContext); + const navigate = useNavigate(); + const { messages } = useMessageContext(); const handleURLClick = useCallback((url: string) => { window.open(url, '_blank'); }, []); - + const downloadLinkRef = useRef(null); const { isSchema, setIsSchema } = useFileContext(); - + const { connectionStatus } = useCredentials(); + const chatAnchor = useRef(null); + const [showChatModeOption, setshowChatModeOption] = useState(false); useEffect(() => { setIsSchema(isSchema); }, [isSchema]); + const openChatPopout = useCallback(() => { + let session = localStorage.getItem('neo4j.connection'); + const isLoading = getIsLoading(messages); + if (session) { + const neo4jConnection = JSON.parse(session); + const { uri } = neo4jConnection; + const userName = neo4jConnection.user; + const { password } = neo4jConnection; + const { database } = neo4jConnection; + const [, port] = uri.split(':'); + const encodedPassword = btoa(password); + const chatUrl = `/chat-only?uri=${encodeURIComponent( + uri + )}&user=${userName}&password=${encodedPassword}&database=${database}&port=${port}&connectionStatus=${connectionStatus}`; + navigate(chatUrl, { state: { messages, isLoading } }); + } else { + const chatUrl = `/chat-only?openModal=true`; + window.open(chatUrl, '_blank'); + } + }, [messages]); + + const onBackButtonClick = () => { + navigate('/', { state: messages }); + }; return ( -
    - -
    + {showBackButton && ( + + + + )} + + {colorMode === 'dark' ? ( + + + + ) : ( + + + + )} + +
    + { + setshowChatModeOption(true); + }} + clean + text='Chat mode' + placement='bottom' + label='Chat mode' + > + + +
    + <> + + downloadClickHandler( + { conversation: messages }, + downloadLinkRef, + 'graph-builder-conversation.json' + ) + } + disabled={messages.length === 1 || getIsLoading(messages)} + placement={chatOnly ? 'left' : 'bottom'} + label={tooltips.downloadChat} + > + + + + <> + + "" + + + + + + +
    + + )} + +
    + { + if (reason.type === 'backdropClick') { + setshowChatModeOption(false); + } + }} + open={showChatModeOption} + menuAnchor={chatAnchor} + isRoot={false} + /> + ); -} +}; export default memo(Header); diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index bf3082fc8..9fd142b29 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -1,28 +1,32 @@ -import { useReducer, useState } from 'react'; +import { lazy, Suspense, useEffect, useReducer, useState } from 'react'; import SideNav from './SideNav'; import DrawerDropzone from './DrawerDropzone'; import DrawerChatbot from './DrawerChatbot'; import Content from '../Content'; -import SettingsModal from '../Popups/Settings/SettingModal'; import { clearChatAPI } from '../../services/QnaAPI'; import { useCredentials } from '../../context/UserCredentials'; -import { UserCredentials } from '../../types'; +import { connectionState, UserCredentials } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; import { useMediaQuery } from '@mui/material'; import { useFileContext } from '../../context/UsersFiles'; import SchemaFromTextDialog from '../Popups/Settings/SchemaFromText'; +import useSpeechSynthesis from '../../hooks/useSpeech'; +import FallBackDialog from '../UI/FallBackDialog'; +import { envConnectionAPI } from '../../services/ConnectAPI'; +import { healthStatus } from '../../services/HealthStatus'; +import { useNavigate } from 'react-router'; -export default function PageLayoutNew({ - isSettingPanelExpanded, - closeSettingModal, - openSettingsDialog, -}: { - isSettingPanelExpanded: boolean; - closeSettingModal: () => void; - openSettingsDialog: () => void; -}) { +const ConnectionModal = lazy(() => import('../Popups/ConnectionModal/ConnectionModal')); + +const PageLayout: React.FC = () => { + const [openConnection, setOpenConnection] = useState({ + openPopUp: false, + chunksExists: false, + vectorIndexMisMatch: false, + chunksExistsWithDifferentDimension: false, + }); const largedesktops = useMediaQuery(`(min-width:1440px )`); - const { userCredentials, connectionStatus } = useCredentials(); + const { userCredentials, connectionStatus, setIsReadOnlyUser } = useCredentials(); const [isLeftExpanded, setIsLeftExpanded] = useState(Boolean(largedesktops)); const [isRightExpanded, setIsRightExpanded] = useState(Boolean(largedesktops)); const [showChatBot, setShowChatBot] = useState(false); @@ -31,6 +35,7 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); + const navigate = useNavigate(); const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); @@ -46,18 +51,184 @@ export default function PageLayoutNew({ } }; - const { messages, setClearHistoryData, clearHistoryData } = useMessageContext(); + const { messages, setClearHistoryData, clearHistoryData, setMessages } = useMessageContext(); const { isSchema, setIsSchema, setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); + const { + setConnectionStatus, + setGdsActive, + setIsBackendConnected, + setUserCredentials, + setErrorMessage, + setShowDisconnectButton, + showDisconnectButton, + } = useCredentials(); + const { cancel } = useSpeechSynthesis(); + + useEffect(() => { + async function initializeConnection() { + const session = localStorage.getItem('neo4j.connection'); + const environment = process.env.VITE_ENV; + const isDev = environment === 'DEV'; + // Fetch backend health status + try { + const response = await healthStatus(); + setIsBackendConnected(response.data.healthy); + } catch (error) { + setIsBackendConnected(false); + } + // To set the disconnect button state + const handleDisconnectButtonState = (isModalOpen: boolean) => { + setShowDisconnectButton(isModalOpen); + localStorage.setItem('disconnectButtonState', isModalOpen ? 'true' : 'false'); + }; + // To parse and set user credentials from session + const setUserCredentialsFromSession = (neo4jConnection: string) => { + if (!neo4jConnection) { + console.error('Invalid session data:', neo4jConnection); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + return; + } + try { + const parsedConnection = JSON.parse(neo4jConnection); + if (parsedConnection.uri && parsedConnection.user && parsedConnection.password && parsedConnection.database) { + setUserCredentials({ + uri: parsedConnection.uri, + userName: parsedConnection.user, + password: atob(parsedConnection.password), + database: parsedConnection.database, + }); + setGdsActive(parsedConnection.isGDS); + setIsReadOnlyUser(parsedConnection.isReadOnlyUser); + } else { + console.error('Invalid parsed session data:', parsedConnection); + } + } catch (error) { + console.error('Failed to parse session data:', error); + } + }; + // To update credentials if environment values differ + const updateSessionIfNeeded = (envCredentials: UserCredentials, storedSession: string) => { + try { + const storedCredentials = JSON.parse(storedSession); + const isDiffCreds = + envCredentials.uri !== storedCredentials.uri || + envCredentials.userName !== storedCredentials.user || + btoa(envCredentials.password) !== storedCredentials.password || + envCredentials.database !== storedCredentials.database; + if (isDiffCreds) { + setUserCredentials(envCredentials); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: envCredentials.uri, + user: envCredentials.userName, + password: btoa(envCredentials.password), + database: envCredentials.database, + userDbVectorIndex: 384, + isReadOnlyUser: envCredentials.isReadonlyUser, + isGDS: envCredentials.isGds, + }) + ); + return true; + } + return false; + } catch (error) { + console.error('Failed to update session:', error); + return false; + } + }; + // Handle case where session exists + let backendApiResponse; + try { + if (isDev) { + backendApiResponse = await envConnectionAPI(); + const connectionData = backendApiResponse.data; + const envCredentials = { + uri: connectionData.data.uri, + password: atob(connectionData.data.password), + userName: connectionData.data.user_name, + database: connectionData.data.database, + isReadonlyUser: !connectionData.data.write_access, + isGds: connectionData.data.gds_status, + }; + if (session && isDev) { + const updated = updateSessionIfNeeded(envCredentials, session); + if (!updated) { + setUserCredentialsFromSession(session); // Using stored session if no update is needed + } + setConnectionStatus(Boolean(connectionData.data.graph_connection)); + setIsBackendConnected(true); + handleDisconnectButtonState(false); + } else if (!session) { + setUserCredentials(envCredentials); + localStorage.setItem( + 'neo4j.connection', + JSON.stringify({ + uri: envCredentials.uri, + user: envCredentials.userName, + password: btoa(envCredentials.password), + database: envCredentials.database, + userDbVectorIndex: 384, + isReadOnlyUser: envCredentials.isReadonlyUser, + isGDS: envCredentials.isGds, + }) + ); + setConnectionStatus(true); + setGdsActive(envCredentials.isGds); + setIsReadOnlyUser(envCredentials.isReadonlyUser); + handleDisconnectButtonState(false); + } + } else if (session && !isDev) { + // For PROD, picking the session values + setUserCredentialsFromSession(session as string); + setConnectionStatus(true); + handleDisconnectButtonState(true); + + } else { + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + handleDisconnectButtonState(true); + } + } catch (error) { + console.error('Error in DEV session handling:', error); + if (session) { + setUserCredentialsFromSession(session as string); + setConnectionStatus(true); + } else { + setErrorMessage(backendApiResponse?.data.error); + setOpenConnection((prev) => ({ ...prev, openPopUp: true })); + } + handleDisconnectButtonState(true); + } + } + initializeConnection(); + }, []); const deleteOnClick = async () => { try { setClearHistoryData(true); + cancel(); const response = await clearChatAPI( userCredentials as UserCredentials, sessionStorage.getItem('session_id') ?? '' ); if (response.data.status === 'Success') { - setClearHistoryData(false); + const date = new Date(); + + setMessages([ + { + datetime: `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`, + id: 2, + modes: { + 'graph+vector+fulltext': { + message: + ' Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', + }, + }, + user: 'chatbot', + currentMode: 'graph+vector+fulltext', + }, + ]); + navigate('.', { replace: true, state: null }); } } catch (error) { console.log(error); @@ -67,6 +238,16 @@ export default function PageLayoutNew({ return (
    + }> + + { setShowTextFromSchemaDialog({ triggeredFrom: '', show: false }); switch (showTextFromSchemaDialog.triggeredFrom) { case 'enhancementtab': toggleEnhancementDialog(); break; - case 'schemadialog': - openSettingsDialog(); - break; default: break; } }} > - { - setShowTextFromSchemaDialog({ triggeredFrom: 'schemadialog', show: true }); - }} - open={isSettingPanelExpanded} - onClose={closeSettingModal} - settingView='headerView' - isSchema={isSchema} - setIsSchema={setIsSchema} - /> setShowChatBot(true)} isLeftExpanded={isLeftExpanded} @@ -123,7 +290,9 @@ export default function PageLayoutNew({ setIsSchema={setIsSchema} showEnhancementDialog={showEnhancementDialog} toggleEnhancementDialog={toggleEnhancementDialog} - closeSettingModal={closeSettingModal} + setOpenConnection={setOpenConnection} + showDisconnectButton={showDisconnectButton} + connectionStatus={connectionStatus} /> {showDrawerChatbot && (
    ); -} +}; + +export default PageLayout; diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index e09ac434a..731469e6b 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -1,14 +1,15 @@ -import React, { useEffect, useRef, useState } from 'react'; -import { Dialog, SideNavigation, TextLink, Tip, useMediaQuery } from '@neo4j-ndl/react'; +import React, { useRef, useState } from 'react'; +import { Dialog, SideNavigation, TextLink, Tooltip, useMediaQuery } from '@neo4j-ndl/react'; import { ArrowRightIconOutline, ArrowLeftIconOutline, - TrashIconOutline, ArrowsPointingOutIconOutline, ChatBubbleOvalLeftEllipsisIconOutline, CloudArrowUpIconSolid, ArrowDownTrayIconOutline, + TrashIconOutline, } from '@neo4j-ndl/react/icons'; + import { SideNavProps } from '../../types'; import Chatbot from '../ChatBot/Chatbot'; import { createPortal } from 'react-dom'; @@ -24,7 +25,7 @@ import S3Component from '../DataSources/AWS/S3Bucket'; import WebButton from '../DataSources/Web/WebButton'; import DropZoneForSmallLayouts from '../DataSources/Local/DropZoneForSmallLayouts'; import { useCredentials } from '../../context/UserCredentials'; -import TipWrapper from '../UI/TipWrapper'; +import TooltipWrapper from '../UI/TipWrapper'; const SideNav: React.FC = ({ position, @@ -43,31 +44,11 @@ const SideNav: React.FC = ({ const [isChatModalOpen, setIsChatModalOpen] = useState(false); const [isFullScreen, setIsFullScreen] = useState(false); const { setMessages } = useMessageContext(); - const [chatModeAnchor, setchatModeAnchor] = useState(null); const [showChatMode, setshowChatMode] = useState(false); const largedesktops = useMediaQuery(`(min-width:1440px )`); const { connectionStatus, isReadOnlyUser } = useCredentials(); const downloadLinkRef = useRef(null); - - const date = new Date(); - useEffect(() => { - if (clearHistoryData) { - setMessages([ - { - datetime: `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`, - id: 2, - modes: { - 'graph+vector+fulltext': { - message: - ' Welcome to the Neo4j Knowledge Graph Chat. You can ask questions related to documents which have been completely processed.', - }, - }, - user: 'chatbot', - currentMode: 'graph+vector+fulltext', - }, - ]); - } - }, [clearHistoryData]); + const anchorMenuRef = useRef(null); const handleExpandClick = () => { setIsChatModalOpen(true); @@ -98,32 +79,38 @@ const SideNav: React.FC = ({ return (
    - + {isExpanded && largedesktops && ( : } + htmlAttributes={{ onClick: handleClick }} + icon={ + position === 'left' ? ( + + ) : ( + + ) + } /> )} {!isExpanded && position === 'left' && largedesktops && ( - - + + + } /> )} {position === 'right' && !isExpanded && ( - - + + + } /> )} @@ -131,108 +118,119 @@ const SideNav: React.FC = ({ {!largedesktops && position === 'left' && !isReadOnlyUser && ( + - + } /> )} {!largedesktops && APP_SOURCES.includes('gcs') && position === 'left' && !isReadOnlyUser && ( + - + } /> )} {!largedesktops && APP_SOURCES.includes('s3') && position === 'left' && !isReadOnlyUser && ( + - + } /> )} {!largedesktops && APP_SOURCES.includes('web') && position === 'left' && !isReadOnlyUser && ( + - + } > )} {position === 'right' && isExpanded && ( <> - + - - - - {tooltips.clearChat} + + + + {tooltips.clearChat} } /> - - + + - + - - {tooltips.maximise} + + {tooltips.maximise} } /> - - + + { - downloadClickHandler( - { conversation: messages }, - downloadLinkRef, - 'graph-builder-conversation.json' - ); + htmlAttributes={{ + onClick: () => { + downloadClickHandler( + { conversation: messages }, + downloadLinkRef, + 'graph-builder-conversation.json' + ); + }, }} icon={ <> - + - - + + Download Conversation "" - + } /> - + {!isChatModalOpen && ( { - setchatModeAnchor(e.currentTarget); - setshowChatMode(true); - }} + ref={anchorMenuRef} icon={ <> - + { + setshowChatMode(true); + }} + size='small' + placement='left' + clean + label='Chat mode' + text='Chat mode' + > setshowChatMode(false)} - menuAnchor={chatModeAnchor} - disableBackdrop={true} - anchorPortal={true} + closeHandler={(_, reason) => { + if (reason.type === 'backdropClick') { + setshowChatMode(false); + } + }} + menuAnchor={anchorMenuRef} + isRoot={false} > } @@ -249,12 +247,13 @@ const SideNav: React.FC = ({ id: 'Chatbot-popup', className: 'n-p-token-4 n-rounded-lg h-[90%]', }} - open={isChatModalOpen} + isOpen={isChatModalOpen} size='unset' - disableCloseButton={true} + hasDisabledCloseButton={true} > - + { + + const { breakpoints } = tokens; + const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const sortedChunksData = useMemo(() => { return chunks.sort((a, b) => a.position - b.position); }, [chunks]); return ( - - Text Chunks + + +
    + +
    + Text Chunks + + These text chunks are extracted to build a knowledge graph and enable accurate information retrieval using + a different retrival strategies + +
    +
    + {!chunksLoading && totalPageCount != null && totalPageCount > 0 && ( +
    + Total Pages: {totalPageCount} +
    + )} +
    {chunksLoading ? ( ) : ( -
      +
        {sortedChunksData.map((c, idx) => ( -
      1. - +
      2. + Position : {c.position} @@ -57,10 +81,10 @@ const ChunkPopUp = ({ {totalPageCount != null && totalPageCount > 1 && ( - + - + diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 76df732a4..fc9462c12 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -1,6 +1,6 @@ -import { Button, Dialog, TextInput, Dropdown, Banner, Dropzone, Typography, TextLink, Flex } from '@neo4j-ndl/react'; +import { Button, Dialog, TextInput, Select, Banner, Dropzone, Typography, TextLink, Flex } from '@neo4j-ndl/react'; import React, { useCallback, useEffect, useMemo, useState, useRef } from 'react'; -import connectAPI from '../../../services/ConnectAPI'; +import { connectAPI } from '../../../services/ConnectAPI'; import { useCredentials } from '../../../context/UserCredentials'; import { useSearchParams } from 'react-router-dom'; import { buttonCaptions } from '../../../utils/Constants'; @@ -41,7 +41,7 @@ export default function ConnectionModal({ const [username, setUsername] = useState(initialusername ?? 'neo4j'); const [password, setPassword] = useState(''); const [connectionMessage, setMessage] = useState({ type: 'unknown', content: '' }); - const { setUserCredentials, userCredentials, setGdsActive, setIsReadOnlyUser } = useCredentials(); + const { setUserCredentials, userCredentials, setGdsActive, setIsReadOnlyUser, errorMessage } = useCredentials(); const [isLoading, setIsLoading] = useState(false); const [searchParams, setSearchParams] = useSearchParams(); const [userDbVectorIndex, setUserDbVectorIndex] = useState(initialuserdbvectorindex ?? undefined); @@ -123,6 +123,12 @@ export default function ConnectionModal({ } }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding, userCredentials]); + useEffect(() => { + if (errorMessage) { + setMessage({ type: 'danger', content: errorMessage }); + } + }, [errorMessage]); + const parseAndSetURI = (uri: string, urlparams = false) => { const uriParts: string[] = uri.split('://'); let uriHost: string[] | string; @@ -311,18 +317,20 @@ export default function ConnectionModal({ <> { setOpenConnection((prev) => ({ ...prev, openPopUp: false })); setMessage({ type: 'unknown', content: '' }); }} - disableCloseButton={vectorIndexLoading} + hasDisabledCloseButton={vectorIndexLoading} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > - Connect to Neo4j + Connect to Neo4j - + Don't have a Neo4j instance? Start for free today @@ -330,17 +338,19 @@ export default function ConnectionModal({ (vectorIndexLoading ? ( ) : ( ))}
        @@ -363,87 +373,97 @@ export default function ConnectionModal({ />
        - newValue && setProtocol(newValue.value), options: protocols.map((option) => ({ label: option, value: option })), value: { label: protocol, value: protocol }, }} className='w-1/4 inline-block' - fluid + isFluid + htmlAttributes={{ + id: 'protocol', + }} />
        handleHostPasteChange(e), + onKeyDown: (e) => handleKeyPress(e, databaseRef), + 'aria-label': 'Connection URI', + }} value={URI} - disabled={false} + isDisabled={false} label='URI' - autoFocus - fluid + isFluid={true} onChange={(e) => setURI(e.target.value)} - onPaste={(e) => handleHostPasteChange(e)} - aria-label='Connection URI' - onKeyDown={(e) => handleKeyPress(e, databaseRef)} />
        setDatabase(e.target.value)} className='w-full' - onKeyDown={handleKeyPress} />
        setUsername(e.target.value)} - onKeyDown={handleKeyPress} />
        setPassword(e.target.value)} - onKeyDown={handleKeyPress} />
        -
        diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 330f5f657..57192f8b4 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react'; import { getDuplicateNodes } from '../../../../services/GetDuplicateNodes'; import { useCredentials } from '../../../../context/UserCredentials'; import { dupNodes, selectedDuplicateNodes, UserCredentials } from '../../../../types'; @@ -30,6 +30,7 @@ import mergeDuplicateNodes from '../../../../services/MergeDuplicateEntities'; import { tokens } from '@neo4j-ndl/base'; import GraphViewModal from '../../../Graph/GraphViewModal'; import { handleGraphNodeClick } from '../../../ChatBot/chatInfo'; +import { ThemeWrapperContext } from '../../../../context/ThemeWrapper'; export default function DeduplicationTab() { const { breakpoints } = tokens; @@ -46,6 +47,8 @@ export default function DeduplicationTab() { const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); const [nodesCount, setNodesCount] = useState(0); + const { colorMode } = useContext(ThemeWrapperContext); + const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -66,10 +69,13 @@ export default function DeduplicationTab() { console.log(error); } }, [userCredentials]); + useEffect(() => { - (async () => { - await fetchDuplicateNodes(); - })(); + if (userCredentials != null) { + (async () => { + await fetchDuplicateNodes(); + })(); + } }, [userCredentials]); const clickHandler = async () => { @@ -127,8 +133,8 @@ export default function DeduplicationTab() { header: ({ table }: { table: Table }) => { return ( ); @@ -137,10 +143,10 @@ export default function DeduplicationTab() { return (
        ); @@ -154,8 +160,10 @@ export default function DeduplicationTab() {
        handleDuplicateNodeClick(info.row.id, 'chatInfoView')} - title={info.getValue()} + htmlAttributes={{ + onClick: () => handleDuplicateNodeClick(info.row.id, 'chatInfoView'), + title: info.getValue(), + }} > {info.getValue()} @@ -179,7 +187,7 @@ export default function DeduplicationTab() { onRemove={() => { onRemove(info.row.original.e.elementId, s.elementId); }} - removeable={true} + isRemovable={true} type='default' size={isTablet ? 'small' : 'medium'} > @@ -286,7 +294,7 @@ export default function DeduplicationTab() { tableInstance={table} styling={{ borderStyle: 'all-sides', - zebraStriping: true, + hasZebraStriping: true, headerStyle: 'clean', }} rootProps={{ @@ -294,7 +302,13 @@ export default function DeduplicationTab() { }} isLoading={isLoading} components={{ - Body: (props) => , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( ([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); + const { colorMode } = useContext(ThemeWrapperContext); const fetchOrphanNodes = useCallback(async () => { try { @@ -62,9 +65,11 @@ export default function DeletePopUpForOrphanNodes({ }, [userCredentials]); useEffect(() => { - (async () => { - await fetchOrphanNodes(); - })(); + if (userCredentials != null) { + (async () => { + await fetchOrphanNodes(); + })(); + } return () => { setOrphanNodes([]); setTotalOrphanNodes(0); @@ -91,8 +96,8 @@ export default function DeletePopUpForOrphanNodes({ header: ({ table }: { table: Table }) => { return ( ); @@ -101,10 +106,10 @@ export default function DeletePopUpForOrphanNodes({ return (
        ); @@ -118,8 +123,10 @@ export default function DeletePopUpForOrphanNodes({
        handleOrphanNodeClick(info.row.id, 'chatInfoView')} - title={info.getValue()} + htmlAttributes={{ + onClick: () => handleOrphanNodeClick(info.row.id, 'chatInfoView'), + title: info.getValue(), + }} > {info.getValue()} @@ -254,7 +261,7 @@ export default function DeletePopUpForOrphanNodes({ tableInstance={table} styling={{ borderStyle: 'all-sides', - zebraStriping: true, + hasZebraStriping: true, headerStyle: 'clean', }} rootProps={{ @@ -262,7 +269,13 @@ export default function DeletePopUpForOrphanNodes({ }} isLoading={isLoading} components={{ - Body: (props) => , + Body: () => ( + + ), PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => { return ( ([]); const [relationshipTypeOptions, setrelationshipTypeOptions] = useState([]); - const [defaultExamples, setdefaultExamples] = useState([]); + const defaultExamples = useMemo(() => getDefaultSchemaExamples(), []); - useEffect(() => { - const parsedData = schemaExamples.reduce((accu: OptionType[], example) => { - const examplevalues: OptionType = { - label: example.schema, - value: JSON.stringify({ - nodelabels: example.labels, - relationshipTypes: example.relationshipTypes, - }), - }; - accu.push(examplevalues); - return accu; - }, []); - setdefaultExamples(parsedData); - }, []); useEffect(() => { if (userCredentials) { if (open && view === 'Dialog') { @@ -265,15 +250,6 @@ export default function EntityExtractionSetting({ ); }; - // Load selectedSchemas from local storage on mount - useEffect(() => { - const storedSchemas = localStorage.getItem('selectedSchemas'); - if (storedSchemas) { - const parsedSchemas = JSON.parse(storedSchemas); - setSelectedSchemas(parsedSchemas.selectedOptions); - } - }, []); - return (
        @@ -290,7 +266,7 @@ export default function EntityExtractionSetting({
        {appLabels.predefinedSchema}
        -
        {appLabels.ownSchema}
        - - } - checked={true} - disabled={true} - aria-label='Selected-postprocessing-jobs' + isChecked={true} + isDisabled={true} + ariaLabel='Selected-postprocessing-jobs' /> ); })} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index b48b6c6a6..d5ebc04a2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -32,7 +32,7 @@ export default function PostProcessingCheckList() { .join(' ')} } - checked={ + isChecked={ isCreateCommunities ? isGdsActive && postProcessingTasks.includes(job.title) : postProcessingTasks.includes(job.title) @@ -44,8 +44,8 @@ export default function PostProcessingCheckList() { setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title)); } }} - disabled={isCreateCommunities && !isGdsActive} - aria-label='checkbox-postProcessing' + isDisabled={isCreateCommunities && !isGdsActive} + ariaLabel='checkbox-postProcessing' /> {job.description}
        diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx index c7b621748..4226099f9 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/index.tsx @@ -1,6 +1,6 @@ -import { Dialog, Tabs, Box, Typography, Flex, useMediaQuery } from '@neo4j-ndl/react'; +import { Dialog, Tabs, Typography, Flex, useMediaQuery } from '@neo4j-ndl/react'; import graphenhancement from '../../../assets/images/graph-enhancements.svg'; -import { useEffect, useState } from 'react'; +import { useState } from 'react'; import DeletePopUpForOrphanNodes from './DeleteTabForOrphanNodes'; import deleteOrphanAPI from '../../../services/DeleteOrphanNodes'; import { UserCredentials } from '../../../types'; @@ -11,15 +11,7 @@ import DeduplicationTab from './Deduplication'; import { tokens } from '@neo4j-ndl/base'; import PostProcessingCheckList from './PostProcessingCheckList'; -export default function GraphEnhancementDialog({ - open, - onClose, - closeSettingModal, -}: { - open: boolean; - onClose: () => void; - closeSettingModal: () => void; -}) { +export default function GraphEnhancementDialog({ open, onClose }: { open: boolean; onClose: () => void }) { const { breakpoints } = tokens; const [orphanDeleteAPIloading, setorphanDeleteAPIloading] = useState(false); const { setShowTextFromSchemaDialog } = useFileContext(); @@ -36,9 +28,6 @@ export default function GraphEnhancementDialog({ console.log(error); } }; - useEffect(() => { - closeSettingModal(); - }, []); const [activeTab, setactiveTab] = useState(0); return ( @@ -47,14 +36,14 @@ export default function GraphEnhancementDialog({ id: 'graph-enhancement-popup', className: 'n-p-token-4 n-rounded-lg', }} - open={open} + isOpen={open} size='unset' - disableCloseButton={false} + hasDisabledCloseButton={false} onClose={onClose} > - - +
        +
        - +
        Graph Enhancements {isTablet @@ -76,23 +65,43 @@ export default function GraphEnhancementDialog({ - + Entity Extraction Settings - + Disconnected Nodes - + De-Duplication Of Nodes - + Post Processing Jobs - - - +
        +
        +
        diff --git a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx index 08991c44b..c0159be65 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx @@ -70,12 +70,14 @@ function ConfirmationDialog({ return ( { setChecked([]); onClose(); }} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > {largeFiles.length === 0 && loading ? ( diff --git a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx index cc684156b..b8dfa0251 100644 --- a/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx @@ -1,4 +1,4 @@ -import { Box, Checkbox, Flex, Typography } from '@neo4j-ndl/react'; +import { Checkbox, Flex, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; import { LargefilesProps } from '../../../types'; import { List, ListItem, ListItemAvatar, ListItemButton, ListItemIcon, ListItemText } from '@mui/material'; @@ -30,16 +30,16 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke [colorMode] ); return ( - - +
        +
        alert icon - +
        Large Document Notice - + One or more of your selected documents are large and may take extra time to process. Please review the estimated times below @@ -51,7 +51,7 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke { if (e.target.checked) { handleToggle(true, f.id); @@ -59,8 +59,8 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke handleToggle(false, f.id); } }} - checked={checked.indexOf(f.id) !== -1} - tabIndex={-1} + isChecked={checked.indexOf(f.id) !== -1} + htmlAttributes={{ tabIndex: -1 }} /> @@ -96,9 +96,9 @@ const LargeFilesAlert: FC = ({ largeFiles, handleToggle, checke ); })} - - - +
        +
        +
        ); }; export default LargeFilesAlert; diff --git a/frontend/src/components/Popups/RetryConfirmation/Index.tsx b/frontend/src/components/Popups/RetryConfirmation/Index.tsx index 6ebb02e33..c6345a956 100644 --- a/frontend/src/components/Popups/RetryConfirmation/Index.tsx +++ b/frontend/src/components/Popups/RetryConfirmation/Index.tsx @@ -27,11 +27,15 @@ function RetryConfirmationDialog({ const file = filesData.find((c) => c.id === fileId); const RetryOptionsForFile = file?.status != 'Completed' ? RETRY_OPIONS : RETRY_OPIONS.slice(0, 2); return ( - + Reprocess Options + + Clicking "Continue" will mark these files as "Ready to Reprocess." To proceed, click “Generate Graph” to start + the reprocessing process. + {alertStatus.showAlert && ( - + {alertStatus.alertMessage} )} @@ -47,17 +51,19 @@ function RetryConfirmationDialog({ }); }); }} - name='retryoptions' - checked={o === file?.retryOption && file?.retryOptionStatus} + htmlAttributes={{ + name: 'retryoptions', + onKeyDown: (e) => { + if (e.code === 'Enter' && file?.retryOption.length) { + retryHandler(file?.name as string, file?.retryOption as string); + } + }, + }} + isChecked={o === file?.retryOption && file?.retryOptionStatus} label={o .split('_') .map((s) => capitalize(s)) .join(' ')} - onKeyDown={(e) => { - if (e.code === 'Enter' && file?.retryOption.length) { - retryHandler(file?.name as string, file?.retryOption as string); - } - }} /> ); })} @@ -65,9 +71,11 @@ function RetryConfirmationDialog({ { diff --git a/frontend/src/components/Popups/Settings/SchemaFromText.tsx b/frontend/src/components/Popups/Settings/SchemaFromText.tsx index 1b9136198..f59ec86bf 100644 --- a/frontend/src/components/Popups/Settings/SchemaFromText.tsx +++ b/frontend/src/components/Popups/Settings/SchemaFromText.tsx @@ -1,4 +1,4 @@ -import { Checkbox, Dialog, Textarea } from '@neo4j-ndl/react'; +import { Checkbox, Dialog, TextArea } from '@neo4j-ndl/react'; import { useCallback, useState } from 'react'; import { getNodeLabelsAndRelTypesFromText } from '../../../services/SchemaFromTextAPI'; import { useCredentials } from '../../../context/UserCredentials'; @@ -7,15 +7,7 @@ import { buttonCaptions } from '../../../utils/Constants'; import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; import { showNormalToast, showSuccessToast } from '../../../utils/toasts'; -const SchemaFromTextDialog = ({ - open, - onClose, - openSettingsDialog, -}: { - open: boolean; - onClose: () => void; - openSettingsDialog: () => void; -}) => { +const SchemaFromTextDialog = ({ open, onClose }: { open: boolean; onClose: () => void }) => { const [userText, setUserText] = useState(''); const [loading, setloading] = useState(false); const { setSelectedNodes, setSelectedRels } = useFileContext(); @@ -84,7 +76,6 @@ const SchemaFromTextDialog = ({ onClose(); setUserText(''); setIsSchema(false); - openSettingsDialog(); } catch (error) { setloading(false); console.log(error); @@ -94,26 +85,30 @@ const SchemaFromTextDialog = ({ return ( { setloading(false); setIsSchema(false); setUserText(''); onClose(); }} + htmlAttributes={{ + 'aria-labelledby': 'form-dialog-title', + }} > - Entity Graph Extraction Settings + Entity Graph Extraction Settings -