synch and some updates to custom rag agent

luigisaetta · luigisaetta · commit 29258d447e3e · 2025-09-09T16:03:47.000+02:00
diff --git a/ai/gen-ai-agents/README.md b/ai/gen-ai-agents/README.md
@@ -18,6 +18,7 @@ Oracle’s Generative AI Agents is a fully managed service that combines the pow
 ## Reusable Assets Overview
 - [HCM agent created by partner Conneqtion Group which contains agents to connect to Fusion HCM, Expense and many others](https://www.youtube.com/watch?v=OhZcWx_H_tQ)
 - [Finance analytics agent created by our partner TPX impact](https://bit.ly/genai4analyst)
+- [Custom RAG agent, based on Langgraph](./custom-rag-agent)
 
 # Useful Links
 
@@ -38,3 +39,4 @@ Copyright (c) 2025 Oracle and/or its affiliates.
 Licensed under the Universal Permissive License (UPL), Version 1.0.
 
 See [LICENSE](https://github.com/oracle-devrel/technology-engineering/blob/main/LICENSE) for more details.
+
diff --git a/ai/gen-ai-agents/custom-rag-agent/README.md b/ai/gen-ai-agents/custom-rag-agent/README.md
@@ -0,0 +1,49 @@
+![UI](images/ui_image.png)
+
+# Custom RAG agent
+This repository contains the code for the development of a **custom RAG Agent**, based on **OCI Generative AI**, **Oracle 23AI** Vector Store and **LangGraph**
+
+**Author**: L. Saetta
+**Last updated**: 09/09/2025
+
+## Design and implementation
+* The agent is implemented using **LangGraph**
+* Vector Search is implemented, using Langchain, on top of Oracle 23AI
+* A **reranker** can be used to refine the search
+
+### Design decisions:
+* For every node of the graph there is a dedicated Python class (a **Runnable**, as QueryRewriter...)
+* **Reranker** is implemented using a LLM. As other option, it is easy to plug-in, for example, Cohere reranker
+* The agent is integrated with **OCI APM**, for **Observability**; Integration using **py-zipkin**
+* UI implemented using **Streamlit**
+* **Semantic Search** is also exposed as a [MCP server](./mcp_semantic_search_with_iam.py) 
+
+### Streaming:
+* Support for streaming events from the agent: as soon as a step is completed (Vector Search, Reranking, ...) the UI is updated.
+For example, links to the documentation' chunks are displayed before the final answer is ready.
+* Streaming of the final answer.
+
+### MCP support:
+(07/2025) I have added an implementation of an **MCP** server that exposes the Semantic Search feature.
+Security can be handled in two ways:
+* custom: generate the **JWT token** using the library **PyJWT**
+* **OCI**: generate the JWT token using **OCI IAM**
+
+## Status
+It is **WIP**.
+
+## References
+* [Integration with OCI APM](https://luigi-saetta.medium.com/enhancing-observability-in-rag-solutions-with-oracle-cloud-6f93b2675f40)
+
+## Advantages of the Agentic approach
+One of the primary advantages of the agentic approach is its **modularity**. 
+Customer requirements often surpass the simplicity of typical Retrieval-Augmented Generation (RAG) demonstrations. Implementing a framework like **LangGraph** necessitates organizing code into a modular sequence of steps, facilitating the seamless integration of additional features at appropriate places.​
+
+For example, to ensure that final responses do not disclose Personally Identifiable Information (PII) present in the knowledge base, one can simply append a node at the end of the graph. This node would process the generated answer, detect any PII, and anonymize it accordingly.
+
+## Configuration
+* use Python 3.11
+* use the requirements.txt
+* create your config_private.py using the template provided
+* for MCP server: create a confidential application in OCI IAM
+
diff --git a/ai/gen-ai-agents/custom-rag-agent/assistant_ui_langgraph.py b/ai/gen-ai-agents/custom-rag-agent/assistant_ui_langgraph.py
@@ -158,6 +158,9 @@ def register_feedback():
     "Select the Chat Model",
     config.MODEL_LIST,
 )
+
+st.sidebar.text_input(label="Embed Model", value=config.EMBED_MODEL_ID, disabled=True)
+
 st.session_state.enable_reranker = st.sidebar.checkbox(
     "Enable Reranker", value=True, disabled=False
 )
diff --git a/ai/gen-ai-agents/custom-rag-agent/config.py b/ai/gen-ai-agents/custom-rag-agent/config.py
@@ -33,8 +33,11 @@
 # added this to distinguish between Cohere end REST NVIDIA models
 # can be OCI or NVIDIA
 EMBED_MODEL_TYPE = "OCI"
+# EMBED_MODEL_TYPE = "NVIDIA"
 EMBED_MODEL_ID = "cohere.embed-multilingual-v3.0"
 # EMBED_MODEL_ID = "cohere.embed-multilingual-image-v3.0"
+
+# this one needs to specify the dimension, default is 1536
 # EMBED_MODEL_ID = "cohere.embed-v4.0"
 
 # to support NVIDIA NIM
@@ -64,21 +67,25 @@
         "xai.grok-4",
         "openai.gpt-4.1",
         "openai.gpt-4o",
+        "openai.gpt-5",
         "meta.llama-3.3-70b-instruct",
         "cohere.command-a-03-2025",
     ]
 else:
     MODEL_LIST = [
         "meta.llama-3.3-70b-instruct",
         "cohere.command-a-03-2025",
+        "openai.gpt-4.1",
+        "openai.gpt-4o",
+        "openai.gpt-5",
     ]
 
 ENABLE_USER_FEEDBACK = True
 
 # semantic search
 TOP_K = 6
 # COLLECTION_LIST = ["BOOKS", "CNAF"]
-COLLECTION_LIST = ["BOOKS", "BOOKS2", "AMPLIFON", "AMPLIFON_EXT"]
+COLLECTION_LIST = ["BOOKS", "NVIDIA_BOOKS2"]
 DEFAULT_COLLECTION = "BOOKS"
 
 
diff --git a/ai/gen-ai-agents/custom-rag-agent/images/ui_image.png b/ai/gen-ai-agents/custom-rag-agent/images/ui_image.png
diff --git a/ai/gen-ai-agents/custom-rag-agent/oci_models.py b/ai/gen-ai-agents/custom-rag-agent/oci_models.py
@@ -20,7 +20,7 @@
     This is a part of a demo showing how to implement an advanced
     RAG solution as a LangGraph agent.
 
-    modifiued to support xAI and OpenAI models through Langchain
+    modified to support xAI and OpenAI models through Langchain
 
 Warnings:
     This module is in development, may change in future versions.
@@ -50,7 +50,9 @@
 
 ALLOWED_EMBED_MODELS_TYPE = {"OCI", "NVIDIA"}
 
+# for gpt5, since max tokens is not supported
 MODELS_WITHOUT_KWARGS = {
+    "openai.gpt-5",
     "openai.gpt-4o-search-preview",
     "openai.gpt-4o-search-preview-2025-03-11",
 }
@@ -126,6 +128,8 @@ def get_embedding_model(model_type="OCI"):
             api_url=NVIDIA_EMBED_MODEL_URL, model=EMBED_MODEL_ID
         )
 
+    logger.info("Embedding model is: %s", EMBED_MODEL_ID)
+    
     return embed_model
 
 
diff --git a/ai/gen-ai-agents/custom-rag-agent/transport.py b/ai/gen-ai-agents/custom-rag-agent/transport.py
@@ -0,0 +1,81 @@
+"""
+File name: trasnport.py
+Author: Luigi Saetta
+Date last modified: 2025-03-31
+Python Version: 3.11
+
+Description:
+    This code provide the http transport support for integration with OCI APM.
+
+Usage:
+    Import this module into other scripts to use its functions.
+    Example:
+       ...
+
+
+License:
+    This code is released under the MIT License.
+
+Notes:
+    This is a part of a demo showing how to implement an advanced
+    RAG solution as a LangGraph agent.
+
+Warnings:
+    This module is in development, may change in future versions.
+"""
+
+import requests
+from utils import get_console_logger
+
+# changed to handle ENABLE_TRACING from UI
+import config
+from config_private import APM_PUBLIC_KEY
+
+
+logger = get_console_logger()
+
+
+def http_transport(encoded_span):
+    """
+    Sends encoded tracing data to OCI APM using py-zipkin.
+
+    Args:
+        encoded_span (bytes): The encoded span data to send.
+
+    Returns:
+        requests.Response or None: The response from the APM service or None if tracing is disabled.
+    """
+    try:
+        # Load config inside the function to avoid global dependency issues
+        base_url = config.APM_BASE_URL
+        content_type = config.APM_CONTENT_TYPE
+
+        # Validate configuration
+        if not base_url:
+            raise ValueError("APM base URL is not configured")
+        if not APM_PUBLIC_KEY:
+            raise ValueError("APM public key is missing")
+
+        # If tracing is disabled, do nothing
+        if not config.ENABLE_TRACING:
+            logger.info("Tracing is disabled. No data sent to APM.")
+            return None
+
+        # Construct endpoint dynamically
+        apm_url = f"{base_url}/observations/public-span?dataFormat=zipkin&dataFormatVersion=2&dataKey={APM_PUBLIC_KEY}"
+
+        response = requests.post(
+            apm_url,
+            data=encoded_span,
+            headers={"Content-Type": content_type},
+            timeout=30,
+        )
+        response.raise_for_status()  # Raise exception for HTTP errors
+
+        return response
+    except requests.RequestException as e:
+        logger.error("Failed to send span to APM: %s", str(e))
+        return None
+    except Exception as e:
+        logger.error("Unexpected error in http_transport: %s", str(e))
+        return None

Original file line number	Diff line number	Diff line change
`@@ -158,6 +158,9 @@ def register_feedback():`
`158`	`158`	`"Select the Chat Model",`
`159`	`159`	`config.MODEL_LIST,`
`160`	`160`	`)`
	`161`	`+`
	`162`	`+st.sidebar.text_input(label="Embed Model", value=config.EMBED_MODEL_ID, disabled=True)`
	`163`	`+`
`161`	`164`	`st.session_state.enable_reranker = st.sidebar.checkbox(`
`162`	`165`	`"Enable Reranker", value=True, disabled=False`
`163`	`166`	`)`