diff --git a/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.1-ai-agent-tools-credit-decision-advisor-rai.ipynb b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.1-ai-agent-tools-credit-decision-advisor-rai.ipynb new file mode 100644 index 00000000..06381452 --- /dev/null +++ b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.1-ai-agent-tools-credit-decision-advisor-rai.ipynb @@ -0,0 +1,371 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b9f55760-d6ca-4e7f-8d21-f90f24347898", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "1. Define UC Fuctions to be used as AI Tools\n", + "- Predictor (we can use ai_query or query the scored output directly to improve the performance of the funtion in-turn Agent)\n", + "- Explianability (we can directly write functions to run functionality for demo purposes and for lower latency - we can write simple sql query)\n", + "2. Reasoning agent built using AI Tools" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f5fb2bbf-bb2f-4553-91b0-5a5f9c032d63", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install unitycatalog-ai[databricks] unitycatalog-langchain[databricks] databricks-langchain" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c590b992-cded-4122-b144-3160b1962ac8", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "4492feea-345b-4e9c-96a8-2c1c755b67db", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%run ../_resources/00-setup $reset_all_data=false" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "435cb13f-ee48-4d94-b333-453443dc2af6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "CREATE FUNCTION IF NOT EXISTS pavithra_rao.credit_decisioning.credit_score(customer_id INT)\n", + "returns table(cust_id INT, prediction DOUBLE)\n", + "LANGUAGE SQL\n", + "COMMENT 'This function takes in a customer id and returns a table with prediction score if the customer will default or not'\n", + "return\n", + "(select cust_id, prediction from pavithra_rao.credit_decisioning.underbanked_prediction where cust_id = customer_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "3ada7ae1-3fb1-4916-a04c-0acbc6a970a9", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "-- Test the function we just created\n", + "SELECT * from pavithra_rao.credit_decisioning.credit_score(5451)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "ad7cf09e-18b4-4b99-8f5e-c3a555ebc223", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "CREATE FUNCTION IF NOT EXISTS pavithra_rao.credit_decisioning.explain_model_shap(customer_id INT)\n", + "RETURNS TABLE (\n", + " index BIGINT,\n", + " cust_id INT,\n", + " education INT,\n", + " marital_status INT,\n", + " months_current_address INT,\n", + " months_employment INT,\n", + " is_resident INT,\n", + " tenure_months INT,\n", + " product_cnt INT,\n", + " tot_rel_bal DOUBLE,\n", + " revenue_tot DOUBLE,\n", + " revenue_12m DOUBLE,\n", + " income_annual INT,\n", + " tot_assets INT,\n", + " overdraft_balance_amount DOUBLE,\n", + " overdraft_number INT,\n", + " total_deposits_number INT,\n", + " total_deposits_amount DOUBLE,\n", + " total_equity_amount DOUBLE,\n", + " total_UT DOUBLE,\n", + " customer_revenue DOUBLE,\n", + " age INT,\n", + " avg_balance DOUBLE,\n", + " num_accs BIGINT,\n", + " balance_usd DOUBLE,\n", + " available_balance_usd DOUBLE,\n", + " is_pre_paid BIGINT,\n", + " number_payment_delays_last12mo BIGINT,\n", + " pct_increase_annual_number_of_delays_last_3_year BIGINT,\n", + " phone_bill_amt DOUBLE,\n", + " avg_phone_bill_amt_lst12mo DOUBLE,\n", + " dist_payer_cnt_12m BIGINT,\n", + " sent_txn_cnt_12m BIGINT,\n", + " sent_txn_amt_12m DOUBLE,\n", + " sent_amt_avg_12m DOUBLE,\n", + " dist_payee_cnt_12m BIGINT,\n", + " rcvd_txn_cnt_12m BIGINT,\n", + " rcvd_txn_amt_12m DOUBLE,\n", + " rcvd_amt_avg_12m DOUBLE,\n", + " dist_payer_cnt_6m BIGINT,\n", + " sent_txn_cnt_6m BIGINT,\n", + " sent_txn_amt_6m DOUBLE,\n", + " sent_amt_avg_6m DOUBLE,\n", + " dist_payee_cnt_6m BIGINT,\n", + " rcvd_txn_cnt_6m BIGINT,\n", + " rcvd_txn_amt_6m DOUBLE,\n", + " rcvd_amt_avg_6m DOUBLE,\n", + " dist_payer_cnt_3m BIGINT,\n", + " sent_txn_cnt_3m BIGINT,\n", + " sent_txn_amt_3m DOUBLE,\n", + " sent_amt_avg_3m DOUBLE,\n", + " dist_payee_cnt_3m BIGINT,\n", + " rcvd_txn_cnt_3m BIGINT,\n", + " rcvd_txn_amt_3m DOUBLE,\n", + " rcvd_amt_avg_3m DOUBLE,\n", + " tot_txn_cnt_12m BIGINT,\n", + " tot_txn_amt_12m DOUBLE,\n", + " tot_txn_cnt_6m BIGINT,\n", + " tot_txn_amt_6m DOUBLE,\n", + " tot_txn_cnt_3m BIGINT,\n", + " tot_txn_amt_3m DOUBLE,\n", + " ratio_txn_amt_3m_12m DOUBLE,\n", + " ratio_txn_amt_6m_12m DOUBLE,\n", + " gender STRING,\n", + " first_name STRING,\n", + " last_name STRING,\n", + " email STRING,\n", + " mobile_phone STRING,\n", + " AMT_CREDIT_MAX_OVERDUE BIGINT,\n", + " AMT_CREDIT_SUM BIGINT,\n", + " AMT_CREDIT_SUM_DEBT BIGINT,\n", + " AMT_CREDIT_SUM_LIMIT BIGINT,\n", + " AMT_CREDIT_SUM_OVERDUE BIGINT,\n", + " CNT_CREDIT_PROLONG BIGINT,\n", + " CREDIT_ACTIVE BIGINT,\n", + " DAYS_CREDIT BIGINT,\n", + " DAYS_CREDIT_ENDDATE BIGINT,\n", + " DAYS_ENDDATE_FACT BIGINT,\n", + " SK_BUREAU_ID BIGINT,\n", + " SK_ID_CURR BIGINT,\n", + " defaulted INT,\n", + " default_prob DOUBLE,\n", + " prediction INT,\n", + " education_shap DOUBLE,\n", + " marital_status_shap DOUBLE,\n", + " months_current_address_shap DOUBLE,\n", + " months_employment_shap DOUBLE,\n", + " is_resident_shap DOUBLE,\n", + " tenure_months_shap DOUBLE,\n", + " product_cnt_shap DOUBLE,\n", + " tot_rel_bal_shap DOUBLE,\n", + " revenue_tot_shap DOUBLE,\n", + " revenue_12m_shap DOUBLE,\n", + " income_annual_shap DOUBLE,\n", + " tot_assets_shap DOUBLE,\n", + " overdraft_balance_amount_shap DOUBLE,\n", + " overdraft_number_shap DOUBLE,\n", + " total_deposits_number_shap DOUBLE,\n", + " total_deposits_amount_shap DOUBLE,\n", + " total_equity_amount_shap DOUBLE,\n", + " total_UT_shap DOUBLE,\n", + " customer_revenue_shap DOUBLE,\n", + " age_shap DOUBLE,\n", + " avg_balance_shap DOUBLE,\n", + " num_accs_shap DOUBLE,\n", + " balance_usd_shap DOUBLE,\n", + " available_balance_usd_shap DOUBLE,\n", + " is_pre_paid_shap DOUBLE,\n", + " number_payment_delays_last12mo_shap DOUBLE,\n", + " pct_increase_annual_number_of_delays_last_3_year_shap DOUBLE,\n", + " phone_bill_amt_shap DOUBLE,\n", + " avg_phone_bill_amt_lst12mo_shap DOUBLE,\n", + " dist_payer_cnt_12m_shap DOUBLE,\n", + " sent_txn_cnt_12m_shap DOUBLE,\n", + " sent_txn_amt_12m_shap DOUBLE,\n", + " sent_amt_avg_12m_shap DOUBLE,\n", + " dist_payee_cnt_12m_shap DOUBLE,\n", + " rcvd_txn_cnt_12m_shap DOUBLE,\n", + " rcvd_txn_amt_12m_shap DOUBLE,\n", + " rcvd_amt_avg_12m_shap DOUBLE,\n", + " dist_payer_cnt_6m_shap DOUBLE,\n", + " sent_txn_cnt_6m_shap DOUBLE,\n", + " sent_txn_amt_6m_shap DOUBLE,\n", + " sent_amt_avg_6m_shap DOUBLE,\n", + " dist_payee_cnt_6m_shap DOUBLE,\n", + " rcvd_txn_cnt_6m_shap DOUBLE,\n", + " rcvd_txn_amt_6m_shap DOUBLE,\n", + " rcvd_amt_avg_6m_shap DOUBLE,\n", + " dist_payer_cnt_3m_shap DOUBLE,\n", + " sent_txn_cnt_3m_shap DOUBLE,\n", + " sent_txn_amt_3m_shap DOUBLE,\n", + " sent_amt_avg_3m_shap DOUBLE,\n", + " dist_payee_cnt_3m_shap DOUBLE,\n", + " rcvd_txn_cnt_3m_shap DOUBLE,\n", + " rcvd_txn_amt_3m_shap DOUBLE,\n", + " rcvd_amt_avg_3m_shap DOUBLE,\n", + " tot_txn_cnt_12m_shap DOUBLE,\n", + " tot_txn_amt_12m_shap DOUBLE,\n", + " tot_txn_cnt_6m_shap DOUBLE,\n", + " tot_txn_amt_6m_shap DOUBLE,\n", + " tot_txn_cnt_3m_shap DOUBLE,\n", + " tot_txn_amt_3m_shap DOUBLE,\n", + " ratio_txn_amt_3m_12m_shap DOUBLE,\n", + " ratio_txn_amt_6m_12m_shap DOUBLE\n", + ")\n", + "LANGUAGE SQL\n", + "COMMENT 'This function takes in a customer id and returns all columns from the shap_explanation table for the given customer id'\n", + "RETURN\n", + "(SELECT * FROM pavithra_rao.credit_decisioning.shap_explanation WHERE cust_id = customer_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "2482dcc9-d93b-4788-a61a-0aae55570b42", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "SELECT * FROM pavithra_rao.credit_decisioning.explain_model_shap(5451)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": "HIGH" + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "2" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 2860937171370624, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 2 + }, + "notebookName": "06.1-ai-agent-tools-credit-decision-advisor-rai", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.2-build-agent-credit-decision-advisor-rai.ipynb b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.2-build-agent-credit-decision-advisor-rai.ipynb new file mode 100644 index 00000000..430edcb6 --- /dev/null +++ b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.2-build-agent-credit-decision-advisor-rai.ipynb @@ -0,0 +1,255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b9f55760-d6ca-4e7f-8d21-f90f24347898", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "Build credit advisor agent using AI Tools/functions" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f5fb2bbf-bb2f-4553-91b0-5a5f9c032d63", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install unitycatalog-ai[databricks] unitycatalog-langchain[databricks] databricks-langchain" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c590b992-cded-4122-b144-3160b1962ac8", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "4492feea-345b-4e9c-96a8-2c1c755b67db", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%run ../_resources/00-setup $reset_all_data=false" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c99c5d53-819f-4a04-8f29-59d042d1ce0f", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "#Build a UCFunctionToolkit with two functions\n", + "from unitycatalog.ai.core.databricks import DatabricksFunctionClient\n", + "from databricks_langchain import UCFunctionToolkit\n", + "function_names = [\n", + " f\"{catalog}.{db}.explain_model_shap\",\n", + " f\"{catalog}.{db}.credit_score\"\n", + "]\n", + "toolkit = UCFunctionToolkit(function_names=function_names)\n", + "tools = toolkit.tools" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "56c5aa8e-1e8f-4cd8-b4c6-ac650e04647a", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Build the agent" + } + }, + "outputs": [], + "source": [ + "# Imports\n", + "from unitycatalog.ai.core.databricks import DatabricksFunctionClient\n", + "from databricks_langchain import UCFunctionToolkit, ChatDatabricks\n", + "from langchain.prompts import ChatPromptTemplate\n", + "from langchain.agents import AgentExecutor, create_tool_calling_agent\n", + "import mlflow\n", + "\n", + "# Initialize the LLM\n", + "LLM_ENDPOINT = \"databricks-claude-3-7-sonnet\" # or your preferred model\n", + "llm = ChatDatabricks(endpoint=LLM_ENDPOINT, temperature=0.0)\n", + "\n", + "# Define a system+human prompt template\n", + "prompt = ChatPromptTemplate.from_messages([\n", + "(\"system\", \"\"\"\n", + "You are a formal, exploratory Credit Risk Advisor for business users. \n", + "Whenever a customer ID is supplied, follow this exact sequence:\n", + "\n", + "1. **Credit Score:** \n", + " – Call `credit_score` to fetch the customer’s prediction and full feature record. \n", + " – Label it **Credit Score:** and summarize in business terms.\n", + "\n", + "2. **SHAP Explanation:** \n", + " – Call `explain_model_shap` to fetch the customer’s SHAP values. \n", + " – Label it **SHAP Explanation:** and summarize in business terms.\n", + " – For *each* of the top 5 contributing features, list: \n", + " • **Feature name** \n", + " • **SHAP value** (with sign and magnitude) \n", + " • **Business impact** (“This feature increased/decreased risk because…”) \n", + " • **Fairness note** (“This feature contributes more to risk for Group A than Group B, increasing bias by X%.”)\n", + "\n", + "3. As a final step, consolidate all the results and findings from the above step and summarize in business-friendly language what the score means and a detailed, explainable, and fairness-aware credit-risk analysis tailored to business users for credit risk management. Show the consolidated summary first, and then the details\n", + "\n", + "If no customer ID is provided, ask explicitly: \"A valid customer ID is required to retrieve credit risk insights. Please provide the customer ID.\"\n", + "\n", + "Always clearly label outputs, explain their meaning in straightforward business language, maintain a formal yet exploratory tone, and highlight potential compliance implications.\n", + "\"\"\"),\n", + " (\"human\", \"{input}\"),\n", + " (\"placeholder\", \"{agent_scratchpad}\")\n", + "])\n", + "\n", + "# Enable MLflow autologging for traceability\n", + "mlflow.langchain.autolog()\n", + "\n", + "# Create the agent wired with your tools and prompt\n", + "agent = create_tool_calling_agent(llm=llm, tools=tools, prompt=prompt)\n", + "\n", + "# Wrap in an executor for easy programmatic use\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "86483a7a-7f7c-4570-8ac4-9fddad59ab08", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "#Example invocations:\n", + "print(agent_executor.invoke({\"input\": \"what is the default risk prediction for cust_id 5451. explain why the model predicted that value for cust_id 5451.\"}))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "dd521e5b-2bc0-466c-9b08-b84ab713f7ad", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": "HIGH" + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "2" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 58049841323135, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 2 + }, + "notebookName": "06.2-build-agent-credit-decision-advisor-rai", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.3-deploy-agent-credit-decision-advisor-rai.py b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.3-deploy-agent-credit-decision-advisor-rai.py new file mode 100644 index 00000000..df1657ea --- /dev/null +++ b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/06.3-deploy-agent-credit-decision-advisor-rai.py @@ -0,0 +1,427 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC #Tool-calling Agent +# MAGIC +# MAGIC This is an auto-generated notebook created by an AI Playground export. +# MAGIC +# MAGIC This notebook uses [Mosaic AI Agent Framework](https://docs.databricks.com/generative-ai/agent-framework/build-genai-apps.html) to recreate your agent from the AI Playground. It demonstrates how to develop, manually test, evaluate, log, and deploy a tool-calling agent in LangGraph. +# MAGIC +# MAGIC The agent code implements [MLflow's ChatAgent](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#mlflow.pyfunc.ChatAgent) interface, a Databricks-recommended open-source standard that simplifies authoring multi-turn conversational agents, and is fully compatible with Mosaic AI agent framework functionality. +# MAGIC +# MAGIC **_NOTE:_** This notebook uses LangChain, but AI Agent Framework is compatible with any agent authoring framework, including LlamaIndex or pure Python agents written with the OpenAI SDK. + +# COMMAND ---------- + +# MAGIC %pip install -U -qqqq mlflow langchain langgraph==0.3.4 databricks-langchain pydantic databricks-agents unitycatalog-langchain[databricks] uv + +# COMMAND ---------- + +# MAGIC %pip install databricks-connect==16.3 + +# COMMAND ---------- + +dbutils.library.restartPython() + +# COMMAND ---------- + +# MAGIC %md ## Define the agent in code +# MAGIC Below we define our agent code in a single cell, enabling us to easily write it to a local Python file for subsequent logging and deployment using the `%%writefile` magic command. +# MAGIC +# MAGIC For more examples of tools to add to your agent, see [docs](https://docs.databricks.com/generative-ai/agent-framework/agent-tool.html). + +# COMMAND ---------- + +# MAGIC %%writefile agent.py +# MAGIC from typing import Any, Generator, Optional, Sequence, Union +# MAGIC +# MAGIC import mlflow +# MAGIC from databricks_langchain import ( +# MAGIC ChatDatabricks, +# MAGIC VectorSearchRetrieverTool, +# MAGIC DatabricksFunctionClient, +# MAGIC UCFunctionToolkit, +# MAGIC set_uc_function_client, +# MAGIC ) +# MAGIC from langchain_core.language_models import LanguageModelLike +# MAGIC from langchain_core.runnables import RunnableConfig, RunnableLambda +# MAGIC from langchain_core.tools import BaseTool +# MAGIC from langgraph.graph import END, StateGraph +# MAGIC from langgraph.graph.graph import CompiledGraph +# MAGIC from langgraph.graph.state import CompiledStateGraph +# MAGIC from langgraph.prebuilt.tool_node import ToolNode +# MAGIC from mlflow.langchain.chat_agent_langgraph import ChatAgentState, ChatAgentToolNode +# MAGIC from mlflow.pyfunc import ChatAgent +# MAGIC from mlflow.types.agent import ( +# MAGIC ChatAgentChunk, +# MAGIC ChatAgentMessage, +# MAGIC ChatAgentResponse, +# MAGIC ChatContext, +# MAGIC ) +# MAGIC +# MAGIC mlflow.langchain.autolog() +# MAGIC +# MAGIC client = DatabricksFunctionClient() +# MAGIC set_uc_function_client(client) +# MAGIC +# MAGIC ############################################ +# MAGIC # Define your LLM endpoint and system prompt +# MAGIC ############################################ +# MAGIC LLM_ENDPOINT_NAME = "databricks-claude-3-7-sonnet" +# MAGIC llm = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME) +# MAGIC +# MAGIC system_prompt = """You are a formal, exploratory Credit Risk Advisor for business users. +# MAGIC Whenever a customer ID is supplied, follow this exact sequence: +# MAGIC +# MAGIC 1. **Credit Score:** +# MAGIC – Call `credit_score` to fetch the customer’s prediction and full feature record. +# MAGIC – Label it **Credit Score:** and summarize in business terms. +# MAGIC +# MAGIC 2. **SHAP Explanation:** +# MAGIC – Call `explain_model_shap` to fetch the customer’s SHAP values. +# MAGIC – Label it **SHAP Explanation:** and summarize in business terms. +# MAGIC – For *each* of the top 5 contributing features, list: +# MAGIC • **Feature name** +# MAGIC • **SHAP value** (with sign and magnitude) +# MAGIC • **Business impact** (“This feature increased/decreased risk because…”) +# MAGIC • **Fairness note** (“This feature contributes more to risk for defualted or non-defualted group, increasing bias by X%.”) +# MAGIC +# MAGIC 3. As a final step, consolidate all the results and findings from the above step and summarize in business-friendly language what the score means and a detailed, explainable, and fairness-aware credit-risk analysis tailored to business users for credit risk management. Show the consolidated summary first along with values,and then the details. +# MAGIC +# MAGIC If no customer ID is provided, ask explicitly: "A valid customer ID is required to retrieve credit risk insights. Please provide the customer ID." +# MAGIC +# MAGIC Always clearly label outputs, explain their meaning in straightforward business language, maintain a formal yet exploratory tone, and highlight potential compliance implications and how the business user should proceed in approving loans or can loan lenders offer the customer the choice to pay with a credit automatically, or refuse if the model believes the risk is too high and will likely result in a payment default.based on the details and facts here. +# MAGIC ("human", "{input}"), +# MAGIC ("placeholder", "{agent_scratchpad}")""" +# MAGIC +# MAGIC ############################################################################### +# MAGIC ## Define tools for your agent, enabling it to retrieve data or take actions +# MAGIC ## beyond text generation +# MAGIC ## To create and see usage examples of more tools, see +# MAGIC ## https://docs.databricks.com/generative-ai/agent-framework/agent-tool.html +# MAGIC ############################################################################### +# MAGIC tools = [] +# MAGIC +# MAGIC # You can use UDFs in Unity Catalog as agent tools +# MAGIC uc_tool_names = ["pavithra_rao.credit_decisioning.*"] +# MAGIC uc_toolkit = UCFunctionToolkit(function_names=uc_tool_names) +# MAGIC tools.extend(uc_toolkit.tools) +# MAGIC +# MAGIC +# MAGIC ##################### +# MAGIC ## Define agent logic +# MAGIC ##################### +# MAGIC +# MAGIC +# MAGIC def create_tool_calling_agent( +# MAGIC model: LanguageModelLike, +# MAGIC tools: Union[Sequence[BaseTool], ToolNode], +# MAGIC system_prompt: Optional[str] = None, +# MAGIC ) -> CompiledGraph: +# MAGIC model = model.bind_tools(tools) +# MAGIC +# MAGIC # Define the function that determines which node to go to +# MAGIC def should_continue(state: ChatAgentState): +# MAGIC messages = state["messages"] +# MAGIC last_message = messages[-1] +# MAGIC # If there are function calls, continue. else, end +# MAGIC if last_message.get("tool_calls"): +# MAGIC return "continue" +# MAGIC else: +# MAGIC return "end" +# MAGIC +# MAGIC if system_prompt: +# MAGIC preprocessor = RunnableLambda( +# MAGIC lambda state: [{"role": "system", "content": system_prompt}] +# MAGIC + state["messages"] +# MAGIC ) +# MAGIC else: +# MAGIC preprocessor = RunnableLambda(lambda state: state["messages"]) +# MAGIC model_runnable = preprocessor | model +# MAGIC +# MAGIC def call_model( +# MAGIC state: ChatAgentState, +# MAGIC config: RunnableConfig, +# MAGIC ): +# MAGIC response = model_runnable.invoke(state, config) +# MAGIC +# MAGIC return {"messages": [response]} +# MAGIC +# MAGIC workflow = StateGraph(ChatAgentState) +# MAGIC +# MAGIC workflow.add_node("agent", RunnableLambda(call_model)) +# MAGIC workflow.add_node("tools", ChatAgentToolNode(tools)) +# MAGIC +# MAGIC workflow.set_entry_point("agent") +# MAGIC workflow.add_conditional_edges( +# MAGIC "agent", +# MAGIC should_continue, +# MAGIC { +# MAGIC "continue": "tools", +# MAGIC "end": END, +# MAGIC }, +# MAGIC ) +# MAGIC workflow.add_edge("tools", "agent") +# MAGIC +# MAGIC return workflow.compile() +# MAGIC +# MAGIC +# MAGIC class LangGraphChatAgent(ChatAgent): +# MAGIC def __init__(self, agent: CompiledStateGraph): +# MAGIC self.agent = agent +# MAGIC +# MAGIC def predict( +# MAGIC self, +# MAGIC messages: list[ChatAgentMessage], +# MAGIC context: Optional[ChatContext] = None, +# MAGIC custom_inputs: Optional[dict[str, Any]] = None, +# MAGIC ) -> ChatAgentResponse: +# MAGIC request = {"messages": self._convert_messages_to_dict(messages)} +# MAGIC +# MAGIC messages = [] +# MAGIC for event in self.agent.stream(request, stream_mode="updates"): +# MAGIC for node_data in event.values(): +# MAGIC messages.extend( +# MAGIC ChatAgentMessage(**msg) for msg in node_data.get("messages", []) +# MAGIC ) +# MAGIC return ChatAgentResponse(messages=messages) +# MAGIC +# MAGIC def predict_stream( +# MAGIC self, +# MAGIC messages: list[ChatAgentMessage], +# MAGIC context: Optional[ChatContext] = None, +# MAGIC custom_inputs: Optional[dict[str, Any]] = None, +# MAGIC ) -> Generator[ChatAgentChunk, None, None]: +# MAGIC request = {"messages": self._convert_messages_to_dict(messages)} +# MAGIC for event in self.agent.stream(request, stream_mode="updates"): +# MAGIC for node_data in event.values(): +# MAGIC yield from ( +# MAGIC ChatAgentChunk(**{"delta": msg}) for msg in node_data["messages"] +# MAGIC ) +# MAGIC +# MAGIC +# MAGIC # Create the agent object, and specify it as the agent object to use when +# MAGIC # loading the agent back for inference via mlflow.models.set_model() +# MAGIC agent = create_tool_calling_agent(llm, tools, system_prompt) +# MAGIC AGENT = LangGraphChatAgent(agent) +# MAGIC mlflow.models.set_model(AGENT) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Test the agent +# MAGIC +# MAGIC Interact with the agent to test its output. Since this notebook called `mlflow.langchain.autolog()` you can view the trace for each step the agent takes. +# MAGIC +# MAGIC Replace this placeholder input with an appropriate domain-specific example for your agent. + +# COMMAND ---------- + +dbutils.library.restartPython() + +# COMMAND ---------- + +# MAGIC %run ../_resources/00-setup $reset_all_data=false + +# COMMAND ---------- + +from agent import AGENT + +AGENT.predict({"messages": [{"role": "user", "content": "Hello!"}]}) + +# COMMAND ---------- + +import logging + +# Suppress warnings and info +logging.getLogger("py4j").setLevel(logging.ERROR) + +for event in AGENT.predict_stream( + {"messages": [{"role": "user", "content": "what is the default risk prediction for cust_id 5451. explain why the model predicted that value for cust_id 5451"}]} +): + print(event, "-----------\n") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ### Log the `agent` as an MLflow model +# MAGIC +# MAGIC Log the agent as code from the `agent.py` file. See [MLflow - Models from Code](https://mlflow.org/docs/latest/models.html#models-from-code). + +# COMMAND ---------- + +# Determine Databricks resources to specify for automatic auth passthrough at deployment time +import mlflow +from agent import tools, LLM_ENDPOINT_NAME +from databricks_langchain import VectorSearchRetrieverTool +from mlflow.models.resources import DatabricksFunction, DatabricksServingEndpoint +from unitycatalog.ai.langchain.toolkit import UnityCatalogTool + +# TODO: Manually include underlying resources if needed. See the TODO in the markdown above for more information. +resources = [DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME)] +for tool in tools: + if isinstance(tool, VectorSearchRetrieverTool): + resources.extend(tool.resources) + elif isinstance(tool, UnityCatalogTool): + resources.append(DatabricksFunction(function_name=tool.uc_function_name)) + +input_example = { + "messages": [ + { + "role": "user", + "content": "what is the default risk prediction for cust_id 5451. explain why the model predicted that value for cust_id 5451." + } + ] +} + +with mlflow.start_run(): + logged_agent_info = mlflow.pyfunc.log_model( + artifact_path="agent", + python_model="agent.py", + input_example=input_example, + resources=resources, + extra_pip_requirements=[ + "databricks-connect" + ] + ) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Evaluate the agent with [Agent Evaluation](https://docs.databricks.com/generative-ai/agent-evaluation/index.html) +# MAGIC +# MAGIC You can edit the requests or expected responses in your evaluation dataset and run evaluation as you iterate your agent, leveraging mlflow to track the computed quality metrics. +# MAGIC +# MAGIC To evaluate your tool calls, try adding [custom metrics](https://docs.databricks.com/generative-ai/agent-evaluation/custom-metrics.html#evaluating-tool-calls). + +# COMMAND ---------- + +import pandas as pd + +eval_set = [ + { + "request": { + "messages": [ + { + "role": "system", + "content": """You are a formal, exploratory Credit Risk Advisor for business users. + +Whenever a customer ID is supplied, follow this exact sequence: + +1. **Credit Score:** + – Call `credit_score` to fetch the customer’s prediction and full feature record. + – Label it **Credit Score:** and summarize in business terms. + +2. **SHAP Explanation:** + – Call `explain_model_shap` to fetch the customer’s SHAP values. + – Label it **SHAP Explanation:** and summarize in business terms. + – For *each* of the top 5 contributing features, list: + • **Feature name** + • **SHAP value** (with sign and magnitude) + • **Business impact** (“This feature increased/decreased risk because…”) + • **Fairness note** (“This feature contributes more to risk for Group A than Group B, increasing bias by X%.”) + +3. As a final step, consolidate all the results and findings from the above step and summarize in business-friendly language what the score means and a detailed, explainable, and fairness-aware credit-risk analysis tailored to business users for credit risk management. Show the consolidated summary first along with key values, and then the details. + +If no customer ID is provided, ask explicitly: "A valid customer ID is required to retrieve credit risk insights. Please provide the customer ID." + +Always clearly label outputs, explain their meaning in straightforward business language, maintain a formal yet exploratory tone, and highlight potential compliance implications and how the business user should proceed in approving loans or can loan lenders offer the customer the choice to pay with a credit automatically, or refuse if the model believes the risk is too high and will likely result in a payment default based on the details and facts here. +("human", "{input}"), +("placeholder", "{agent_scratchpad}")""" + }, + { + "role": "user", + "content": "what is the default risk prediction for cust_id 5451. explain why the model predicted that value for cust_id 5451." + } + ] + }, + "expected_response": """ # Credit Risk Analysis for Customer ID 5451 + +## Credit Score: +The customer has a prediction value of...... + +## SHAP Explanation: +SHAP (SHapley Additive exPlanations) values help us understand which features contributed most to this high-risk prediction. Here are the top 5 contributing factors: + +1. + +2. + +3. + +4. + +5. + +## Summary of Credit Risk Assessment: .... + +Recommended next steps.""" + } +] + +eval_dataset = pd.DataFrame(eval_set) +display(eval_dataset) + +# COMMAND ---------- + +import mlflow +from pyspark.sql.types import StructType, StructField, StringType, IntegerType + +with mlflow.start_run(run_id=logged_agent_info.run_id): + eval_results = mlflow.evaluate( + model=f"runs:/{logged_agent_info.run_id}/agent", + data=eval_dataset, # Ensure eval_dataset is defined with the correct schema + model_type="databricks-agent", # Enable Mosaic AI Agent Evaluation + ) + +# Review the evaluation results in the MLFLow UI (see console output) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Perform pre-deployment validation of the agent +# MAGIC Before registering and deploying the agent, we perform pre-deployment checks via the [mlflow.models.predict()](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.predict) API. See [documentation](https://docs.databricks.com/machine-learning/model-serving/model-serving-debug.html#validate-inputs) for details + +# COMMAND ---------- + +mlflow.models.predict( + model_uri=f"runs:/{logged_agent_info.run_id}/agent", + input_data={"messages": [{"role": "user", "content": "what is the default risk prediction for cust_id 10548. explain why the model predicted that value for cust_id 10548"}]}, + env_manager="uv", +) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Register the model to Unity Catalog +# MAGIC +# MAGIC Update the `catalog`, `schema`, and `model_name` below to register the MLflow model to Unity Catalog. + +# COMMAND ---------- + +mlflow.set_registry_uri("databricks-uc") +catalog = 'pavithra_rao' +dbName = 'credit_decisioning' +model_name = 'credit_decision_advisor' +UC_MODEL_NAME = f"{catalog}.{dbName}.{model_name}" + +# register the model to UC +uc_registered_model_info = mlflow.register_model( + model_uri=logged_agent_info.model_uri, name=UC_MODEL_NAME +) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Deploy the agent + +# COMMAND ---------- + +from databricks import agents +agents.deploy(UC_MODEL_NAME, uc_registered_model_info.version, tags = {"llm": LLM_ENDPOINT_NAME},) + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Next steps +# MAGIC +# MAGIC After your agent is deployed, you can chat with it in AI playground to perform additional checks, share it with SMEs in your organization for feedback, or embed it in a production application. See [docs](https://docs.databricks.com/generative-ai/deploy-agent.html) for details diff --git a/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/agent.py b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/agent.py new file mode 100644 index 00000000..5150e34f --- /dev/null +++ b/demo-FSI/lakehouse-fsi-credit-decisioning/(Clone) 06-Generative-AI/agent.py @@ -0,0 +1,172 @@ +from typing import Any, Generator, Optional, Sequence, Union + +import mlflow +from databricks_langchain import ( + ChatDatabricks, + VectorSearchRetrieverTool, + DatabricksFunctionClient, + UCFunctionToolkit, + set_uc_function_client, +) +from langchain_core.language_models import LanguageModelLike +from langchain_core.runnables import RunnableConfig, RunnableLambda +from langchain_core.tools import BaseTool +from langgraph.graph import END, StateGraph +from langgraph.graph.graph import CompiledGraph +from langgraph.graph.state import CompiledStateGraph +from langgraph.prebuilt.tool_node import ToolNode +from mlflow.langchain.chat_agent_langgraph import ChatAgentState, ChatAgentToolNode +from mlflow.pyfunc import ChatAgent +from mlflow.types.agent import ( + ChatAgentChunk, + ChatAgentMessage, + ChatAgentResponse, + ChatContext, +) + +mlflow.langchain.autolog() + +client = DatabricksFunctionClient() +set_uc_function_client(client) + +############################################ +# Define your LLM endpoint and system prompt +############################################ +LLM_ENDPOINT_NAME = "databricks-claude-3-7-sonnet" +llm = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME) + +system_prompt = """You are a formal, exploratory Credit Risk Advisor for business users. +Whenever a customer ID is supplied, follow this exact sequence: + +1. **Credit Score:** + – Call `credit_score` to fetch the customer’s prediction and full feature record. + – Label it **Credit Score:** and summarize in business terms. + +2. **SHAP Explanation:** + – Call `explain_model_shap` to fetch the customer’s SHAP values. + – Label it **SHAP Explanation:** and summarize in business terms. + – For *each* of the top 5 contributing features, list: + • **Feature name** + • **SHAP value** (with sign and magnitude) + • **Business impact** (“This feature increased/decreased risk because…”) + • **Fairness note** (“This feature contributes more to risk for defualted or non-defualted group, increasing bias by X%.”) + +3. As a final step, consolidate all the results and findings from the above step and summarize in business-friendly language what the score means and a detailed, explainable, and fairness-aware credit-risk analysis tailored to business users for credit risk management. Show the consolidated summary first along with values,and then the details. + +If no customer ID is provided, ask explicitly: "A valid customer ID is required to retrieve credit risk insights. Please provide the customer ID." + +Always clearly label outputs, explain their meaning in straightforward business language, maintain a formal yet exploratory tone, and highlight potential compliance implications and how the business user should proceed in approving loans or can loan lenders offer the customer the choice to pay with a credit automatically, or refuse if the model believes the risk is too high and will likely result in a payment default.based on the details and facts here. + ("human", "{input}"), + ("placeholder", "{agent_scratchpad}")""" + +############################################################################### +## Define tools for your agent, enabling it to retrieve data or take actions +## beyond text generation +## To create and see usage examples of more tools, see +## https://docs.databricks.com/generative-ai/agent-framework/agent-tool.html +############################################################################### +tools = [] + +# You can use UDFs in Unity Catalog as agent tools +uc_tool_names = ["pavithra_rao.credit_decisioning.*"] +uc_toolkit = UCFunctionToolkit(function_names=uc_tool_names) +tools.extend(uc_toolkit.tools) + + +##################### +## Define agent logic +##################### + + +def create_tool_calling_agent( + model: LanguageModelLike, + tools: Union[Sequence[BaseTool], ToolNode], + system_prompt: Optional[str] = None, +) -> CompiledGraph: + model = model.bind_tools(tools) + + # Define the function that determines which node to go to + def should_continue(state: ChatAgentState): + messages = state["messages"] + last_message = messages[-1] + # If there are function calls, continue. else, end + if last_message.get("tool_calls"): + return "continue" + else: + return "end" + + if system_prompt: + preprocessor = RunnableLambda( + lambda state: [{"role": "system", "content": system_prompt}] + + state["messages"] + ) + else: + preprocessor = RunnableLambda(lambda state: state["messages"]) + model_runnable = preprocessor | model + + def call_model( + state: ChatAgentState, + config: RunnableConfig, + ): + response = model_runnable.invoke(state, config) + + return {"messages": [response]} + + workflow = StateGraph(ChatAgentState) + + workflow.add_node("agent", RunnableLambda(call_model)) + workflow.add_node("tools", ChatAgentToolNode(tools)) + + workflow.set_entry_point("agent") + workflow.add_conditional_edges( + "agent", + should_continue, + { + "continue": "tools", + "end": END, + }, + ) + workflow.add_edge("tools", "agent") + + return workflow.compile() + + +class LangGraphChatAgent(ChatAgent): + def __init__(self, agent: CompiledStateGraph): + self.agent = agent + + def predict( + self, + messages: list[ChatAgentMessage], + context: Optional[ChatContext] = None, + custom_inputs: Optional[dict[str, Any]] = None, + ) -> ChatAgentResponse: + request = {"messages": self._convert_messages_to_dict(messages)} + + messages = [] + for event in self.agent.stream(request, stream_mode="updates"): + for node_data in event.values(): + messages.extend( + ChatAgentMessage(**msg) for msg in node_data.get("messages", []) + ) + return ChatAgentResponse(messages=messages) + + def predict_stream( + self, + messages: list[ChatAgentMessage], + context: Optional[ChatContext] = None, + custom_inputs: Optional[dict[str, Any]] = None, + ) -> Generator[ChatAgentChunk, None, None]: + request = {"messages": self._convert_messages_to_dict(messages)} + for event in self.agent.stream(request, stream_mode="updates"): + for node_data in event.values(): + yield from ( + ChatAgentChunk(**{"delta": msg}) for msg in node_data["messages"] + ) + + +# Create the agent object, and specify it as the agent object to use when +# loading the agent back for inference via mlflow.models.set_model() +agent = create_tool_calling_agent(llm, tools, system_prompt) +AGENT = LangGraphChatAgent(agent) +mlflow.models.set_model(AGENT)