diff --git a/README.md b/README.md index 464bf655..f5a57050 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ If you have a resource-constrained PC, try increasing `HEALTHCHECK_START_PERIOD` enough before healthcheck begins. For more information, please refer to this [link](https://docs.docker.com/reference/compose-file/services/#healthcheck) + +Set the model by updating your `.env` file: ```bash cd backend cp .env.example .env @@ -60,9 +62,9 @@ make docker-down ### Prerequisites -- [`uv`](https://docs.astral.sh/uv/) (for managing Python, virtual environments, and dependencies) -- `wget` -- `pandoc` +- [`uv`](https://docs.astral.sh/uv/) (for managing Python, virtual environments, and dependencies) +- `wget` +- `pandoc` - `git` **Step 1**: Install the required dependencies. @@ -141,12 +143,12 @@ flowchart LR id1([Vectorstore]) --- id3([MMR Retriever]) id1([Vectorstore]) --- id4([BM25 Retriever]) - id2([Semantic Retriever]) -- Retrieved Docs ---> id5([Reranking]) + id2([Semantic Retriever]) -- Retrieved Docs ---> id5([Reranking]) id3([MMR Retriever]) -- Retrieved Docs ---> id5([Reranking]) id4([BM25 Retriever]) -- Retrieved Docs ---> id5([Reranking]) id5([Reranking]) ---> id6(top-n docs) - + ``` Depending on the input query, each query can be forwarded to any one of the following retrievers, diff --git a/backend/src/api/routers/chains.py b/backend/src/api/routers/chains.py index e0e409e3..32d20808 100644 --- a/backend/src/api/routers/chains.py +++ b/backend/src/api/routers/chains.py @@ -54,12 +54,18 @@ llm = ChatOllama(model=model_name, temperature=llm_temp) elif os.getenv("LLM_MODEL") == "gemini": - if os.getenv("GOOGLE_GEMINI") == "1_pro": - llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=llm_temp) - elif os.getenv("GOOGLE_GEMINI") == "1.5_flash": - llm = ChatVertexAI(model_name="gemini-1.5-flash", temperature=llm_temp) - elif os.getenv("GOOGLE_GEMINI") == "1.5_pro": - llm = ChatVertexAI(model_name="gemini-1.5-pro", temperature=llm_temp) + gemini_model = os.getenv("GOOGLE_GEMINI") + if gemini_model in {"1_pro", "1.5_flash", "1.5_pro"}: + raise ValueError( + f"The selected Gemini model '{gemini_model}' (version 1.0–1.5) is disabled. " + "Please upgrade to version 2.0 or higher (e.g., 2.0_flash, 2.5_pro)." + ) + elif gemini_model == "2.0_flash": + llm = ChatVertexAI(model_name="gemini-2.0-flash", temperature=llm_temp) + elif gemini_model == "2.5_flash": + llm = ChatVertexAI(model_name="gemini-2.5-flash", temperature=llm_temp) + elif gemini_model == "2.5_pro": + llm = ChatVertexAI(model_name="gemini-2.5-pro", temperature=llm_temp) else: raise ValueError("GOOGLE_GEMINI environment variable not set to a valid value.") diff --git a/backend/src/api/routers/graphs.py b/backend/src/api/routers/graphs.py index d0284df6..4c443621 100644 --- a/backend/src/api/routers/graphs.py +++ b/backend/src/api/routers/graphs.py @@ -75,12 +75,18 @@ llm = ChatOllama(model=model_name, temperature=llm_temp) elif os.getenv("LLM_MODEL") == "gemini": - if os.getenv("GOOGLE_GEMINI") == "1_pro": - llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=llm_temp) - elif os.getenv("GOOGLE_GEMINI") == "1.5_flash": - llm = ChatVertexAI(model_name="gemini-1.5-flash", temperature=llm_temp) - elif os.getenv("GOOGLE_GEMINI") == "1.5_pro": - llm = ChatVertexAI(model_name="gemini-1.5-pro", temperature=llm_temp) + gemini_model = os.getenv("GOOGLE_GEMINI") + if gemini_model in {"1_pro", "1.5_flash", "1.5_pro"}: + raise ValueError( + f"The selected Gemini model '{gemini_model}' (version 1.0–1.5) is disabled. " + "Please upgrade to version 2.0 or higher (e.g., 2.0_flash, 2.5_pro)." + ) + elif gemini_model == "2.0_flash": + llm = ChatVertexAI(model_name="gemini-2.0-flash", temperature=llm_temp) + elif gemini_model == "2.5_flash": + llm = ChatVertexAI(model_name="gemini-2.5-flash", temperature=llm_temp) + elif gemini_model == "2.5_pro": + llm = ChatVertexAI(model_name="gemini-2.5-pro", temperature=llm_temp) else: raise ValueError("GOOGLE_GEMINI environment variable not set to a valid value.")