diff --git a/ai/gen-ai-agents/README.md b/ai/gen-ai-agents/README.md index 3993671d2..2cb49fb56 100644 --- a/ai/gen-ai-agents/README.md +++ b/ai/gen-ai-agents/README.md @@ -18,6 +18,7 @@ Oracle’s Generative AI Agents is a fully managed service that combines the pow ## Reusable Assets Overview - [HCM agent created by partner Conneqtion Group which contains agents to connect to Fusion HCM, Expense and many others](https://www.youtube.com/watch?v=OhZcWx_H_tQ) - [Finance analytics agent created by our partner TPX impact](https://bit.ly/genai4analyst) +- [Custom RAG agent, based on Langgraph](./custom-rag-agent) # Useful Links @@ -38,3 +39,4 @@ Copyright (c) 2025 Oracle and/or its affiliates. Licensed under the Universal Permissive License (UPL), Version 1.0. See [LICENSE](https://github.com/oracle-devrel/technology-engineering/blob/main/LICENSE) for more details. + diff --git a/ai/gen-ai-agents/custom-rag-agent/README.md b/ai/gen-ai-agents/custom-rag-agent/README.md new file mode 100644 index 000000000..a4dc34c28 --- /dev/null +++ b/ai/gen-ai-agents/custom-rag-agent/README.md @@ -0,0 +1,49 @@ +![UI](images/ui_image.png) + +# Custom RAG agent +This repository contains the code for the development of a **custom RAG Agent**, based on **OCI Generative AI**, **Oracle 23AI** Vector Store and **LangGraph** + +**Author**: L. Saetta +**Last updated**: 09/09/2025 + +## Design and implementation +* The agent is implemented using **LangGraph** +* Vector Search is implemented, using Langchain, on top of Oracle 23AI +* A **reranker** can be used to refine the search + +### Design decisions: +* For every node of the graph there is a dedicated Python class (a **Runnable**, as QueryRewriter...) +* **Reranker** is implemented using a LLM. As other option, it is easy to plug-in, for example, Cohere reranker +* The agent is integrated with **OCI APM**, for **Observability**; Integration using **py-zipkin** +* UI implemented using **Streamlit** +* **Semantic Search** is also exposed as a [MCP server](./mcp_semantic_search_with_iam.py) + +### Streaming: +* Support for streaming events from the agent: as soon as a step is completed (Vector Search, Reranking, ...) the UI is updated. +For example, links to the documentation' chunks are displayed before the final answer is ready. +* Streaming of the final answer. + +### MCP support: +(07/2025) I have added an implementation of an **MCP** server that exposes the Semantic Search feature. +Security can be handled in two ways: +* custom: generate the **JWT token** using the library **PyJWT** +* **OCI**: generate the JWT token using **OCI IAM** + +## Status +It is **WIP**. + +## References +* [Integration with OCI APM](https://luigi-saetta.medium.com/enhancing-observability-in-rag-solutions-with-oracle-cloud-6f93b2675f40) + +## Advantages of the Agentic approach +One of the primary advantages of the agentic approach is its **modularity**. +Customer requirements often surpass the simplicity of typical Retrieval-Augmented Generation (RAG) demonstrations. Implementing a framework like **LangGraph** necessitates organizing code into a modular sequence of steps, facilitating the seamless integration of additional features at appropriate places.​ + +For example, to ensure that final responses do not disclose Personally Identifiable Information (PII) present in the knowledge base, one can simply append a node at the end of the graph. This node would process the generated answer, detect any PII, and anonymize it accordingly. + +## Configuration +* use Python 3.11 +* use the requirements.txt +* create your config_private.py using the template provided +* for MCP server: create a confidential application in OCI IAM + diff --git a/ai/gen-ai-agents/custom-rag-agent/assistant_ui_langgraph.py b/ai/gen-ai-agents/custom-rag-agent/assistant_ui_langgraph.py index 3099ba225..7de5398b3 100644 --- a/ai/gen-ai-agents/custom-rag-agent/assistant_ui_langgraph.py +++ b/ai/gen-ai-agents/custom-rag-agent/assistant_ui_langgraph.py @@ -158,6 +158,9 @@ def register_feedback(): "Select the Chat Model", config.MODEL_LIST, ) + +st.sidebar.text_input(label="Embed Model", value=config.EMBED_MODEL_ID, disabled=True) + st.session_state.enable_reranker = st.sidebar.checkbox( "Enable Reranker", value=True, disabled=False ) diff --git a/ai/gen-ai-agents/custom-rag-agent/config.py b/ai/gen-ai-agents/custom-rag-agent/config.py index 8f8ed6622..ab2458535 100644 --- a/ai/gen-ai-agents/custom-rag-agent/config.py +++ b/ai/gen-ai-agents/custom-rag-agent/config.py @@ -33,8 +33,11 @@ # added this to distinguish between Cohere end REST NVIDIA models # can be OCI or NVIDIA EMBED_MODEL_TYPE = "OCI" +# EMBED_MODEL_TYPE = "NVIDIA" EMBED_MODEL_ID = "cohere.embed-multilingual-v3.0" # EMBED_MODEL_ID = "cohere.embed-multilingual-image-v3.0" + +# this one needs to specify the dimension, default is 1536 # EMBED_MODEL_ID = "cohere.embed-v4.0" # to support NVIDIA NIM @@ -64,6 +67,7 @@ "xai.grok-4", "openai.gpt-4.1", "openai.gpt-4o", + "openai.gpt-5", "meta.llama-3.3-70b-instruct", "cohere.command-a-03-2025", ] @@ -71,6 +75,9 @@ MODEL_LIST = [ "meta.llama-3.3-70b-instruct", "cohere.command-a-03-2025", + "openai.gpt-4.1", + "openai.gpt-4o", + "openai.gpt-5", ] ENABLE_USER_FEEDBACK = True @@ -78,7 +85,7 @@ # semantic search TOP_K = 6 # COLLECTION_LIST = ["BOOKS", "CNAF"] -COLLECTION_LIST = ["BOOKS", "BOOKS2", "AMPLIFON", "AMPLIFON_EXT"] +COLLECTION_LIST = ["BOOKS", "NVIDIA_BOOKS2"] DEFAULT_COLLECTION = "BOOKS" diff --git a/ai/gen-ai-agents/custom-rag-agent/images/ui_image.png b/ai/gen-ai-agents/custom-rag-agent/images/ui_image.png new file mode 100644 index 000000000..b8e0dd78a Binary files /dev/null and b/ai/gen-ai-agents/custom-rag-agent/images/ui_image.png differ diff --git a/ai/gen-ai-agents/custom-rag-agent/oci_models.py b/ai/gen-ai-agents/custom-rag-agent/oci_models.py index d1a9ff602..d89249192 100644 --- a/ai/gen-ai-agents/custom-rag-agent/oci_models.py +++ b/ai/gen-ai-agents/custom-rag-agent/oci_models.py @@ -20,7 +20,7 @@ This is a part of a demo showing how to implement an advanced RAG solution as a LangGraph agent. - modifiued to support xAI and OpenAI models through Langchain + modified to support xAI and OpenAI models through Langchain Warnings: This module is in development, may change in future versions. @@ -50,7 +50,9 @@ ALLOWED_EMBED_MODELS_TYPE = {"OCI", "NVIDIA"} +# for gpt5, since max tokens is not supported MODELS_WITHOUT_KWARGS = { + "openai.gpt-5", "openai.gpt-4o-search-preview", "openai.gpt-4o-search-preview-2025-03-11", } @@ -126,6 +128,8 @@ def get_embedding_model(model_type="OCI"): api_url=NVIDIA_EMBED_MODEL_URL, model=EMBED_MODEL_ID ) + logger.info("Embedding model is: %s", EMBED_MODEL_ID) + return embed_model diff --git a/ai/gen-ai-agents/custom-rag-agent/transport.py b/ai/gen-ai-agents/custom-rag-agent/transport.py new file mode 100644 index 000000000..5ce876408 --- /dev/null +++ b/ai/gen-ai-agents/custom-rag-agent/transport.py @@ -0,0 +1,81 @@ +""" +File name: trasnport.py +Author: Luigi Saetta +Date last modified: 2025-03-31 +Python Version: 3.11 + +Description: + This code provide the http transport support for integration with OCI APM. + +Usage: + Import this module into other scripts to use its functions. + Example: + ... + + +License: + This code is released under the MIT License. + +Notes: + This is a part of a demo showing how to implement an advanced + RAG solution as a LangGraph agent. + +Warnings: + This module is in development, may change in future versions. +""" + +import requests +from utils import get_console_logger + +# changed to handle ENABLE_TRACING from UI +import config +from config_private import APM_PUBLIC_KEY + + +logger = get_console_logger() + + +def http_transport(encoded_span): + """ + Sends encoded tracing data to OCI APM using py-zipkin. + + Args: + encoded_span (bytes): The encoded span data to send. + + Returns: + requests.Response or None: The response from the APM service or None if tracing is disabled. + """ + try: + # Load config inside the function to avoid global dependency issues + base_url = config.APM_BASE_URL + content_type = config.APM_CONTENT_TYPE + + # Validate configuration + if not base_url: + raise ValueError("APM base URL is not configured") + if not APM_PUBLIC_KEY: + raise ValueError("APM public key is missing") + + # If tracing is disabled, do nothing + if not config.ENABLE_TRACING: + logger.info("Tracing is disabled. No data sent to APM.") + return None + + # Construct endpoint dynamically + apm_url = f"{base_url}/observations/public-span?dataFormat=zipkin&dataFormatVersion=2&dataKey={APM_PUBLIC_KEY}" + + response = requests.post( + apm_url, + data=encoded_span, + headers={"Content-Type": content_type}, + timeout=30, + ) + response.raise_for_status() # Raise exception for HTTP errors + + return response + except requests.RequestException as e: + logger.error("Failed to send span to APM: %s", str(e)) + return None + except Exception as e: + logger.error("Unexpected error in http_transport: %s", str(e)) + return None