bettersg · yevkim · Oct 17, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/.github/workflows/deploy_functions_dev.yml b/.github/workflows/deploy_functions_dev.yml
@@ -76,4 +76,4 @@ jobs:
         working-directory: backend
         env:
           GOOGLE_APPLICATION_CREDENTIALS: ${{ runner.temp }}/gcloud.json
-        run: npx firebase-tools deploy --only functions --project schemessg-v3-dev --debug --token "${{ secrets.FIREBASE_TOKEN_DEV }}"
+        run: npx firebase-tools deploy --only functions --project schemessg-v3-dev --token "${{ secrets.FIREBASE_TOKEN_DEV }}"
diff --git a/.github/workflows/deploy_functions_prod.yml b/.github/workflows/deploy_functions_prod.yml
@@ -77,4 +77,4 @@ jobs:
         env:
           GOOGLE_APPLICATION_CREDENTIALS: ${{ runner.temp }}/gcloud.json
           ENVIRONMENT: prod  # Explicitly set production environment
-        run: npx firebase-tools deploy --only functions --project schemessg --debug --token "${{ secrets.FIREBASE_TOKEN_PROD }}"
+        run: npx firebase-tools deploy --only functions --project schemessg --token "${{ secrets.FIREBASE_TOKEN_PROD }}"
diff --git a/.gitignore b/.gitignore
@@ -168,7 +168,7 @@ tags
 # ruff
 .ruff_cache
 
-
+# ml files
 backend/.env
 backend/ml_logic/schemesv2-torch-allmpp-model
 backend/ml_logic/schemesv2-torch-allmpp-model/config.json
@@ -181,6 +181,7 @@ backend/ml_logic/schemesv2-torch-allmpp-tokenizer/vocab.txt
 backend/ml_logic/schemesv2-your_embeddings.npy
 backend/ml_logic/schemesv2-your_index.faiss
 backend/functions/ml_logic/index_to_scheme_id.json
+backend/functions/ml_logic/vector_store
 
 __pycache__/
 .env
@@ -198,6 +199,7 @@ out/
 .env.test.local
 .env.production.local
 .env.prod
+!.env.example
 
 # npm debug logs
 npm-debug.log*

diff --git a/README.md b/README.md
@@ -38,12 +38,7 @@ Ensure you have the following installed:
 1. **Environment Variables and Model Files**
 Download the following required files from Google Drive (contact maintainers for access):
 - `.env` file → place in `backend/functions/`
-- `schemesv2-torch-allmpp-model/` → place in `backend/functions/ml_logic/`
-- `schemesv2-torch-allmpp-tokenizer/` → place in `backend/functions/ml_logic/`
-- Required `.npy` files → place in `backend/functions/ml_logic/`
-- Required `.faiss` files → place in `backend/functions/ml_logic/`
-
-Alternatively, you can build the model files yourself using `model-creation-transformer-faiss.ipynb`
+- Required `vector_store/` → place in `backend/functions/ml_logic/`
 
 Note: The `.env` file contains sensitive configuration for Azure OpenAI services and should never be committed to version control.
 

diff --git a/backend/.gitignore b/backend/.gitignore
@@ -70,5 +70,7 @@ node_modules/
 
 # credentials
 creds.*
+
+# test files
 *.sh
 jwt_token.txt
diff --git a/backend/CHANGELOG.md b/backend/CHANGELOG.md
@@ -0,0 +1,17 @@
+### Changelog
+
+| No. | Previous | New | Comments |
+|-----|----------|-----|----------|
+| 1 | `RunnableWithMessageHistory` used in `Chatbot.initialise_graph` (`ml_logic/chatbotManager.py`) | Replaced with LangGraph compiled graph. Chat model initialization now uses `init_chat_model` instead of `AzureOpenAI` model. | - |
+| 2 | Prompt & inputs, manually created `MessageHistory` | Defined `ChatbotState` (in `ml_logic/states.py`) to be passed in the graph. | Initially considered using `RunnableContext` for dynamic inputs like searches, but LangGraph caching requires search results to be modeled as nodes. Since caching is not a node, `ChatbotState` is used instead. |
+| 3 | `InMemoryCache` | Custom wrapper around LangGraph’s `InMemoryCache` with LRU implementation for backward compatibility (`ml_logic/cache.py`). | Cache is initialized in `__init__`. Consider moving to `initialise()` to align more closely with singleton lifecycle management. |
+| 4 | `Chatbot.get_session_history` | Implementation moved to `FirestoreChatSaver.get_session_history`. Introduced `Checkpoint` class to store more metadata for LangGraph checkpoint compatibility (`ml_logic/firestore_saver.py`). | Additional metadata is stored for compatibility and potential future use when more nodes are introduced. |
+| 5 | Chain `stream` and `invoke` | Graph `stream` and `invoke` | Streaming requires special handling when caching is enabled. An additional cache check and token replay step simulate streaming for cached responses. |
+| 6 | Old prompt | Updated prompt with tone, explicit request for questions, and inclusion of user’s first `query_text` into context. Prompts moved from `ml_logic/chatbotManager.py` to `ml_logic/prompts.py`. | `query_text` was not included previously, which could lead to irrelevant responses. Moving prompts to a dedicated module also separates concerns more cleanly. |
+| 7 | - | Cleanup: text preprocessing moved to `ml_logic/text_utils.py`, logger setup to `utils/logging_setup.py`, and LLM configs to `ml_logic/config.py`. | - |
+
+---
+
+### Next Steps
+- **Firestore saver**: Current implementation is a backward-compatibility measure. For agentic design patterns, saver needs to be redesigned with well-defined collections (possibly namespaces).  
+- **Async support**: Async code in tests was removed. If async functionality is required, methods can be updated to use `ainvoke` / `astream`, with corresponding test refactoring.  
diff --git a/backend/functions/.env.example b/backend/functions/.env.example
@@ -0,0 +1,24 @@
+# Docker details
+GAR_IMAGE=
+GAR_MEMORY=
+
+# Specific env vars for AzureOpenAI model
+AZURE_OPENAI_API_KEY=
+AZURE_OPENAI_ENDPOINT=
+OPENAI_API_VERSION=
+AZURE_OPENAI_DEPLOYMENT_NAME=
+
+# Firebase Configuration
+FB_TYPE=
+FB_PROJECT_ID=
+FB_PRIVATE_KEY_ID=
+FB_PRIVATE_KEY=
+FB_CLIENT_ID=
+FB_AUTH_URI=
+FB_TOKEN_URI=
+FB_AUTH_PROVIDER_X509_CERT_URL=
+FB_CLIENT_X509_CERT_URL=
+FB_UNIVERSE_DOMAIN=
+
+# Firebase Web API Key
+FB_API_KEY=
diff --git a/backend/functions/chat/chat.py b/backend/functions/chat/chat.py
@@ -12,21 +12,14 @@
 from firebase_functions import https_fn, options
 from loguru import logger
 from ml_logic import Chatbot, dataframe_to_text
+from utils.auth import verify_auth_token
 from utils.cors_config import get_cors_headers, handle_cors_preflight
 from utils.auth import verify_auth_token
+from utils.json_utils import safe_json_dumps
+from utils.logging_setup import setup_logging
 
 
-# Remove default handler
-logger.remove()
-
-# Add custom handler with async writing
-logger.add(
-    sys.stderr,
-    level="INFO",  # Set to "DEBUG" in development
-    enqueue=True,  # Enable async logging
-    backtrace=False,  # Disable traceback for better performance
-    diagnose=False,  # Disable diagnosis for better performance
-)
+logger = setup_logging()
 
 
 def create_chatbot():
@@ -109,7 +102,7 @@ def chat_message(req: https_fn.Request) -> https_fn.Response:
     except Exception as e:
         logger.exception("Unable to fetch user query from firestore", e)
         return https_fn.Response(
-            response=json.dumps({"error": "Internal server error, unable to fetch user query from firestore"}),
+            response=safe_json_dumps({"error": "Internal server error, unable to fetch user query from firestore"}),
             status=500,
             mimetype="application/json",
             headers=headers,
@@ -185,10 +178,12 @@ def generate():
     except Exception as e:
         logger.exception("Error with chatbot", e)
         return https_fn.Response(
-            response=json.dumps({"error": "Internal server error"}),
+            response=safe_json_dumps({"error": "Internal server error"}),
             status=500,
             mimetype="application/json",
             headers=headers,
         )
 
-    return https_fn.Response(response=json.dumps(results), status=200, mimetype="application/json", headers=headers)
+    return https_fn.Response(
+        response=safe_json_dumps(results), status=200, mimetype="application/json", headers=headers
+    )
diff --git a/backend/functions/ml_logic/__init__.py b/backend/functions/ml_logic/__init__.py
@@ -1,2 +1,5 @@
-from .chatbotManager import Chatbot, dataframe_to_text
-from .searchModelManager import PredictParams, PaginatedSearchParams, SearchModel
+from .cache import InMemoryCacheWithMaxsize, generate_cache_key
+from .chatbotManager import Chatbot
+from .firestore_saver import FirestoreChatSaver
+from .searchModelManager import PaginatedSearchParams, PredictParams, SearchModel
+from .text_utils import dataframe_to_text
diff --git a/backend/functions/ml_logic/cache.py b/backend/functions/ml_logic/cache.py
@@ -0,0 +1,91 @@
+"""Custom cache implementation with maxsize enforcement."""
+
+import datetime
+import hashlib
+from collections import OrderedDict
+from typing import Mapping, Sequence
+
+from langgraph.cache.base import FullKey, Namespace, ValueT
+from langgraph.cache.memory import InMemoryCache
+from langgraph.checkpoint.serde.base import SerializerProtocol
+from utils.logging_setup import setup_logging
+
+from .states import ChatbotState
+
+logger = setup_logging()
+
+
+class InMemoryCacheWithMaxsize(InMemoryCache):
+    """Included maxsize enforcement for cache for backwards compatibility."""
+
+    def __init__(self, *, serde: SerializerProtocol | None = None, maxsize: int = 1000):
+        super().__init__(serde=serde)
+        self.maxsize = maxsize
+        self._cache: dict[Namespace, OrderedDict[str, tuple[str, bytes, float | None]]] = {}
+
+    def get(self, keys: Sequence[FullKey]) -> dict[FullKey, ValueT]:
+        """Get the cached values for the given keys."""
+        with self._lock:
+            if not keys:
+                return {}
+            now = datetime.datetime.now(datetime.timezone.utc).timestamp()
+            values: dict[FullKey, ValueT] = {}
+            for ns_tuple, key in keys:
+                ns = Namespace(ns_tuple)
+                if ns in self._cache and key in self._cache[ns]:
+                    enc, val, expiry = self._cache[ns][key]
+                    if expiry is None or now < expiry:
+                        values[(ns, key)] = self.serde.loads_typed((enc, val))
+                        # For backwards compatibility
+                        logger.info(f"Cache hit for query combination (key: {key[:8]}...)")
+                    else:
+                        del self._cache[ns][key]
+            return values
+
+    def set(self, keys: Mapping[FullKey, tuple[ValueT, int | None]]) -> None:
+        """Set the cached values for the given keys."""
+        with self._lock:
+            now = datetime.datetime.now(datetime.timezone.utc)
+            for (ns, key), (value, ttl) in keys.items():
+                if ttl is not None:
+                    delta = datetime.timedelta(seconds=ttl)
+                    expiry: float | None = (now + delta).timestamp()
+                else:
+                    expiry = None
+                if ns not in self._cache:
+                    self._cache[ns] = OrderedDict()
+                self._cache[ns][key] = (
+                    *self.serde.dumps_typed(value),
+                    expiry,
+                )
+                self._enforce_namespace_maxsize(ns)
+
+    def _enforce_namespace_maxsize(self, ns: Namespace):
+        """Enforce maxsize for a specific namespace using LRU"""
+        if len(self._cache[ns]) > self.maxsize:
+            # Remove oldest items until under maxsize
+            while len(self._cache[ns]) > self.maxsize:
+                self._cache[ns].popitem(last=False)
+
+
+def generate_cache_key(state: ChatbotState) -> str:
+    """Generate a cache key for the given chatbot state.
+
+    Args:
+        state (ChatbotState): The chatbot state to generate a cache key for.
+        Requires `top_schemes_text`, `query_text`, and the last user message in `messages`.
+
+    Returns:
+        str: The generated cache key.
+    """
+    query_text = state["query_text"]
+    input_text = state["top_schemes_text"]
+    if isinstance(state["messages"], list):
+        if len(state["messages"]) > 0:
+            message_content = state["messages"][-1].content
+        else:
+            message_content = "<empty>"
+    else:
+        message_content = state["messages"].content if state["messages"] else ""
+    combined_text = f"{query_text}:{input_text}:{message_content}"
+    return hashlib.sha256(combined_text.encode()).hexdigest()