diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/.gitignore b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/00-data-uc-function-setup.ipynb b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/00-data-uc-function-setup.ipynb new file mode 100644 index 0000000..f2249ad --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/00-data-uc-function-setup.ipynb @@ -0,0 +1,2040 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d81b24d3-4ef3-4990-bb1b-6de5b0858dcb", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Library Update and Restart Operation" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ngoogle-api-core 2.20.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.19.5, but you have protobuf 6.33.2 which is incompatible.\ngrpcio-status 1.71.0 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 6.33.2 which is incompatible.\nmlflow-skinny 2.21.3 requires protobuf<6,>=3.12.0, but you have protobuf 6.33.2 which is incompatible.\u001B[0m\u001B[31m\n\u001B[0m\u001B[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.\u001B[0m\n" + ] + } + ], + "source": [ + "%pip install -qqqq -U databricks-sdk\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d5104102-9ea2-43bf-8ca7-40c6fe2abad6", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Widgets Declaration" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"cyber_full_path\", \"\", \"cyber_full_path\")\n", + "dbutils.widgets.text(\"user_full_path\", \"\", \"user_full_path\")\n", + "dbutils.widgets.text(\"catalog\", \"\", \"catalog\")\n", + "dbutils.widgets.text(\"schema\", \"\", \"schema\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "841fb816-83f7-4caf-b187-6ecf4b46877b", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Widget Paths Adjustment" + } + }, + "outputs": [], + "source": [ + "def get_full_path(full_path):\n", + " if full_path.startswith(\"/Workspace\"):\n", + " full_path = f\"file:{full_path}\"\n", + " return full_path\n", + "\n", + "\n", + "cyber_full_path = get_full_path(dbutils.widgets.get(\"cyber_full_path\"))\n", + "user_full_path = get_full_path(dbutils.widgets.get(\"user_full_path\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "8f328d85-d29a-4a45-b91b-45bc6da2fd06", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Load Cyber Threat Detection and User Info Data" + } + }, + "outputs": [], + "source": [ + "cyber_threat_detection_df = spark.read.format(\"parquet\").load(cyber_full_path)\n", + "user_info_df = spark.read.format(\"parquet\").load(user_full_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "7b30fb47-5c23-4e64-8ea7-7055425b6251", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Display Cyber Threat Detection Data" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
threat_idevent_timestampsource_ipdestination_ipsource_portdestination_portprotocolthreat_typedetection_tooldescription
82025-09-08T02:35:06.534Z192.168.1.2110.0.0.1653149746397TCPRansomwareIDSSynthetic threat event #8
132025-09-08T02:35:06.534Z192.168.1.2110.0.0.1093338447791TCPMalwareSIEMSynthetic threat event #13
162025-09-08T02:35:06.534Z192.168.1.2110.0.0.1062371614761TCPRansomwareFirewallSynthetic threat event #16
172025-09-08T02:35:06.534Z192.168.1.2110.0.0.1511627652723UDPRansomwareIPSSynthetic threat event #17
192025-09-08T02:35:06.534Z192.168.1.2110.0.0.526044348535UDPMalwareFirewallSynthetic threat event #19
202025-09-08T02:35:06.534Z192.168.1.2110.0.0.184489161847UDPBruteForceIPSSynthetic threat event #20
272025-09-08T02:35:06.534Z192.168.1.2110.0.0.1371886665124TCPPhishingFirewallSynthetic threat event #27
372025-09-08T02:35:06.534Z192.168.1.2110.0.0.1442974522066TCPBruteForceFirewallSynthetic threat event #37
402025-09-08T02:35:06.534Z192.168.1.2110.0.0.304518852861UDPPhishingFirewallSynthetic threat event #40
412025-09-08T02:35:06.534Z192.168.1.2110.0.0.108346297246UDPPhishingIPSSynthetic threat event #41
422025-09-08T02:35:06.534Z192.168.1.2110.0.0.134389219420TCPPhishingIDSSynthetic threat event #42
572025-09-08T02:35:06.534Z192.168.1.2110.0.0.462886735827UDPRansomwareIDSSynthetic threat event #57
632025-09-08T02:35:06.534Z192.168.1.2110.0.0.2092062725952TCPDDoSIPSSynthetic threat event #63
722025-09-08T02:35:06.534Z192.168.1.2110.0.0.535223220318UDPBruteForceIPSSynthetic threat event #72
862025-09-08T02:35:06.534Z192.168.1.2110.0.0.2001360141855UDPMalwareSIEMSynthetic threat event #86
882025-09-08T02:35:06.534Z192.168.1.2110.0.0.925022147874TCPPhishingIDSSynthetic threat event #88
942025-09-08T02:35:06.534Z192.168.1.2110.0.0.714544727246UDPBruteForceSIEMSynthetic threat event #94
282025-09-08T02:35:06.534Z192.168.1.14610.0.0.1501761525068TCPPhishingFirewallSynthetic threat event #28
112025-09-08T02:35:06.534Z192.168.1.10410.0.0.1444213918790TCPRansomwareSIEMSynthetic threat event #11
122025-09-08T02:35:06.534Z192.168.1.2110.0.0.2264539062222TCPMalwareIDSSynthetic threat event #12
522025-09-08T02:35:06.534Z192.168.1.22810.0.0.965068439013TCPRansomwareSIEMSynthetic threat event #52
532025-09-08T02:35:06.534Z192.168.1.11410.0.0.693229418526UDPRansomwareIDSSynthetic threat event #53
362025-09-08T02:35:06.534Z192.168.1.8110.0.0.801194613299TCPBruteForceIDSSynthetic threat event #36
732025-09-08T02:35:06.534Z192.168.1.12810.0.0.248264352216UDPRansomwareFirewallSynthetic threat event #73
772025-09-08T02:35:06.534Z192.168.1.3210.0.0.10286058299UDPPhishingIPSSynthetic threat event #77
782025-09-08T02:35:06.534Z192.168.1.13210.0.0.2292241125295TCPPhishingIPSSynthetic threat event #78
492025-09-08T02:35:06.534Z192.168.1.2710.0.0.172916948481UDPBruteForceSIEMSynthetic threat event #49
502025-09-08T02:35:06.534Z192.168.1.2510.0.0.246957758301UDPBruteForceIPSSynthetic threat event #50
142025-09-08T02:35:06.534Z192.168.1.15910.0.0.1261490146758TCPBruteForceSIEMSynthetic threat event #14
332025-09-08T02:35:06.534Z192.168.1.1410.0.0.1596037238800UDPPhishingSIEMSynthetic threat event #33
342025-09-08T02:35:06.534Z192.168.1.6510.0.0.2411088926272UDPBruteForceIDSSynthetic threat event #34
692025-09-08T02:35:06.534Z192.168.1.1910.0.0.35638028243UDPMalwareIDSSynthetic threat event #69
702025-09-08T02:35:06.534Z192.168.1.21910.0.0.471235219677UDPMalwareFirewallSynthetic threat event #70
952025-09-08T02:35:06.534Z192.168.1.610.0.0.903016033916UDPPhishingIPSSynthetic threat event #95
742025-09-08T02:35:06.534Z192.168.1.9310.0.0.17210012150TCPRansomwareSIEMSynthetic threat event #74
752025-09-08T02:35:06.534Z192.168.1.7310.0.0.2321363147820TCPDDoSSIEMSynthetic threat event #75
912025-09-08T02:35:06.534Z192.168.1.17810.0.0.2113448725698UDPPhishingIPSSynthetic threat event #91
922025-09-08T02:35:06.534Z192.168.1.17510.0.0.1095824055180TCPRansomwareFirewallSynthetic threat event #92
382025-09-08T02:35:06.534Z192.168.1.15510.0.0.243098042013UDPBruteForceIDSSynthetic threat event #38
392025-09-08T02:35:06.534Z192.168.1.2010.0.0.1111146531299UDPPhishingIPSSynthetic threat event #39
222025-09-08T02:35:06.534Z192.168.1.2110.0.0.1633092543234UDPPhishingIPSSynthetic threat event #22
232025-09-08T02:35:06.534Z192.168.1.17810.0.0.2412502214691UDPRansomwareSIEMSynthetic threat event #23
612025-09-08T02:35:06.534Z192.168.1.12910.0.0.1732987155279UDPBruteForceIPSSynthetic threat event #61
622025-09-08T02:35:06.534Z192.168.1.25110.0.0.2121545938634TCPMalwareIDSSynthetic threat event #62
472025-09-08T02:35:06.534Z192.168.1.16510.0.0.2483323451449TCPRansomwareIDSSynthetic threat event #47
482025-09-08T02:35:06.534Z192.168.1.12410.0.0.135361164576TCPDDoSIPSSynthetic threat event #48
582025-09-08T02:35:06.534Z192.168.1.18010.0.0.273098740680TCPMalwareFirewallSynthetic threat event #58
592025-09-08T02:35:06.534Z192.168.1.12410.0.0.1474375358875UDPRansomwareIDSSynthetic threat event #59
22025-09-08T02:35:06.534Z192.168.1.25410.0.0.1823751942530UDPBruteForceIDSSynthetic threat event #2
32025-09-08T02:35:06.534Z192.168.1.18810.0.0.2435826026206UDPRansomwareSIEMSynthetic threat event #3
92025-09-08T02:35:06.534Z192.168.1.23410.0.0.1962564440011UDPMalwareIDSSynthetic threat event #9
832025-09-08T02:35:06.534Z192.168.1.12910.0.0.156198553064TCPRansomwareIDSSynthetic threat event #83
842025-09-08T02:35:06.534Z192.168.1.3410.0.0.1502448547791UDPPhishingIPSSynthetic threat event #84
242025-09-08T02:35:06.534Z192.168.1.10010.0.0.226628852252TCPBruteForceIPSSynthetic threat event #24
252025-09-08T02:35:06.534Z192.168.1.25410.0.0.2391152014800TCPDDoSSIEMSynthetic threat event #25
662025-09-08T02:35:06.534Z192.168.1.21910.0.0.18731324960TCPDDoSIDSSynthetic threat event #66
672025-09-08T02:35:06.534Z192.168.1.6510.0.0.1532827214213UDPRansomwareIPSSynthetic threat event #67
972025-09-08T02:35:06.534Z192.168.1.16610.0.0.191216134076UDPBruteForceIPSSynthetic threat event #97
982025-09-08T02:35:06.534Z192.168.1.13210.0.0.2104637724301UDPMalwareSIEMSynthetic threat event #98
802025-09-08T02:35:06.534Z192.168.1.9110.0.0.823658841762TCPDDoSIDSSynthetic threat event #80
812025-09-08T02:35:06.534Z192.168.1.22810.0.0.167380686209TCPDDoSSIEMSynthetic threat event #81
52025-09-08T02:35:06.534Z192.168.1.13710.0.0.14402937310UDPRansomwareIPSSynthetic threat event #5
62025-09-08T02:35:06.534Z192.168.1.1510.0.0.252899549030TCPDDoSFirewallSynthetic threat event #6
302025-09-08T02:35:06.534Z192.168.1.7210.0.0.1264379045511TCPDDoSIDSSynthetic threat event #30
312025-09-08T02:35:06.534Z192.168.1.1710.0.0.471644822043UDPBruteForceIDSSynthetic threat event #31
552025-09-08T02:35:06.534Z192.168.1.15710.0.0.712280939626TCPPhishingIPSSynthetic threat event #55
562025-09-08T02:35:06.534Z192.168.1.6510.0.0.1326273464579TCPDDoSIDSSynthetic threat event #56
642025-09-08T02:35:06.534Z192.168.1.11210.0.0.101679432746TCPMalwareIDSSynthetic threat event #64
992025-09-08T02:35:06.534Z192.168.1.4810.0.0.14464523540UDPDDoSIDSSynthetic threat event #99
1002025-09-08T02:35:06.534Z192.168.1.3510.0.0.1404850264007TCPPhishingSIEMSynthetic threat event #100
892025-09-08T02:35:06.534Z192.168.1.19810.0.0.209723619763UDPDDoSIDSSynthetic threat event #89
872025-09-08T02:35:06.534Z192.168.1.13610.0.0.2373725342372TCPPhishingIPSSynthetic threat event #87
442025-09-08T02:35:06.534Z192.168.1.6410.0.0.1782068637462TCPDDoSIDSSynthetic threat event #44
452025-09-08T02:35:06.534Z192.168.1.15210.0.0.1576071756603UDPPhishingIPSSynthetic threat event #45
182025-09-08T02:35:06.534Z192.168.1.16910.0.0.2173282061164TCPBruteForceFirewallSynthetic threat event #18
42025-09-08T02:35:06.534Z192.168.1.23910.0.0.954109257741TCPRansomwareFirewallSynthetic threat event #4
262025-09-08T02:35:06.534Z192.168.1.10410.0.0.1832446255890UDPBruteForceFirewallSynthetic threat event #26
902025-09-08T02:35:06.534Z192.168.1.15210.0.0.1921508924172TCPRansomwareFirewallSynthetic threat event #90
962025-09-08T02:35:06.534Z192.168.1.1410.0.0.2292385732644UDPBruteForceFirewallSynthetic threat event #96
12025-09-08T02:35:06.534Z192.168.1.9410.0.0.2535533448431TCPBruteForceFirewallSynthetic threat event #1
512025-09-08T02:35:06.534Z192.168.1.19710.0.0.1634123761209TCPPhishingFirewallSynthetic threat event #51
712025-09-08T02:35:06.534Z192.168.1.19310.0.0.241752928691TCPRansomwareIPSSynthetic threat event #71
152025-09-08T02:35:06.534Z192.168.1.13110.0.0.2620110703UDPBruteForceSIEMSynthetic threat event #15
292025-09-08T02:35:06.534Z192.168.1.2310.0.0.833635040712UDPBruteForceIPSSynthetic threat event #29
652025-09-08T02:35:06.534Z192.168.1.10310.0.0.1231321154881UDPBruteForceIPSSynthetic threat event #65
822025-09-08T02:35:06.534Z192.168.1.23610.0.0.1526032349303TCPBruteForceIDSSynthetic threat event #82
852025-09-08T02:35:06.534Z192.168.1.15610.0.0.210309967227UDPBruteForceIDSSynthetic threat event #85
212025-09-08T02:35:06.534Z192.168.1.13610.0.0.1453660835947TCPDDoSFirewallSynthetic threat event #21
432025-09-08T02:35:06.534Z192.168.1.22010.0.0.1254666830725UDPRansomwareIDSSynthetic threat event #43
762025-09-08T02:35:06.534Z192.168.1.20710.0.0.1125731941247UDPRansomwareIPSSynthetic threat event #76
102025-09-08T02:35:06.534Z192.168.1.24510.0.0.1722317846172UDPBruteForceIDSSynthetic threat event #10
932025-09-08T02:35:06.534Z192.168.1.19710.0.0.1892601653849UDPPhishingIDSSynthetic threat event #93
462025-09-08T02:35:06.534Z192.168.1.4710.0.0.292671762558TCPBruteForceIDSSynthetic threat event #46
542025-09-08T02:35:06.534Z192.168.1.3110.0.0.425978653595TCPMalwareIDSSynthetic threat event #54
72025-09-08T02:35:06.534Z192.168.1.17010.0.0.2222914364710UDPPhishingIPSSynthetic threat event #7
682025-09-08T02:35:06.534Z192.168.1.9610.0.0.25095037795TCPMalwareIDSSynthetic threat event #68
322025-09-08T02:35:06.534Z192.168.1.14510.0.0.243457414317UDPDDoSSIEMSynthetic threat event #32
602025-09-08T02:35:06.534Z192.168.1.22310.0.0.164336801296TCPDDoSIPSSynthetic threat event #60
792025-09-08T02:35:06.534Z192.168.1.18710.0.0.1883648425886TCPDDoSIDSSynthetic threat event #79
352025-09-08T02:35:06.534Z192.168.1.4810.0.0.209591277478UDPDDoSIPSSynthetic threat event #35
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + 8, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.165", + 31497, + 46397, + "TCP", + "Ransomware", + "IDS", + "Synthetic threat event #8" + ], + [ + 13, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.109", + 33384, + 47791, + "TCP", + "Malware", + "SIEM", + "Synthetic threat event #13" + ], + [ + 16, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.106", + 23716, + 14761, + "TCP", + "Ransomware", + "Firewall", + "Synthetic threat event #16" + ], + [ + 17, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.151", + 16276, + 52723, + "UDP", + "Ransomware", + "IPS", + "Synthetic threat event #17" + ], + [ + 19, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.52", + 60443, + 48535, + "UDP", + "Malware", + "Firewall", + "Synthetic threat event #19" + ], + [ + 20, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.184", + 4891, + 61847, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #20" + ], + [ + 27, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.137", + 18866, + 65124, + "TCP", + "Phishing", + "Firewall", + "Synthetic threat event #27" + ], + [ + 37, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.144", + 29745, + 22066, + "TCP", + "BruteForce", + "Firewall", + "Synthetic threat event #37" + ], + [ + 40, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.30", + 45188, + 52861, + "UDP", + "Phishing", + "Firewall", + "Synthetic threat event #40" + ], + [ + 41, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.108", + 34629, + 7246, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #41" + ], + [ + 42, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.134", + 38921, + 9420, + "TCP", + "Phishing", + "IDS", + "Synthetic threat event #42" + ], + [ + 57, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.46", + 28867, + 35827, + "UDP", + "Ransomware", + "IDS", + "Synthetic threat event #57" + ], + [ + 63, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.209", + 20627, + 25952, + "TCP", + "DDoS", + "IPS", + "Synthetic threat event #63" + ], + [ + 72, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.53", + 52232, + 20318, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #72" + ], + [ + 86, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.200", + 13601, + 41855, + "UDP", + "Malware", + "SIEM", + "Synthetic threat event #86" + ], + [ + 88, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.92", + 50221, + 47874, + "TCP", + "Phishing", + "IDS", + "Synthetic threat event #88" + ], + [ + 94, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.71", + 45447, + 27246, + "UDP", + "BruteForce", + "SIEM", + "Synthetic threat event #94" + ], + [ + 28, + "2025-09-08T02:35:06.534Z", + "192.168.1.146", + "10.0.0.150", + 17615, + 25068, + "TCP", + "Phishing", + "Firewall", + "Synthetic threat event #28" + ], + [ + 11, + "2025-09-08T02:35:06.534Z", + "192.168.1.104", + "10.0.0.144", + 42139, + 18790, + "TCP", + "Ransomware", + "SIEM", + "Synthetic threat event #11" + ], + [ + 12, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.226", + 45390, + 62222, + "TCP", + "Malware", + "IDS", + "Synthetic threat event #12" + ], + [ + 52, + "2025-09-08T02:35:06.534Z", + "192.168.1.228", + "10.0.0.96", + 50684, + 39013, + "TCP", + "Ransomware", + "SIEM", + "Synthetic threat event #52" + ], + [ + 53, + "2025-09-08T02:35:06.534Z", + "192.168.1.114", + "10.0.0.69", + 32294, + 18526, + "UDP", + "Ransomware", + "IDS", + "Synthetic threat event #53" + ], + [ + 36, + "2025-09-08T02:35:06.534Z", + "192.168.1.81", + "10.0.0.80", + 11946, + 13299, + "TCP", + "BruteForce", + "IDS", + "Synthetic threat event #36" + ], + [ + 73, + "2025-09-08T02:35:06.534Z", + "192.168.1.128", + "10.0.0.248", + 2643, + 52216, + "UDP", + "Ransomware", + "Firewall", + "Synthetic threat event #73" + ], + [ + 77, + "2025-09-08T02:35:06.534Z", + "192.168.1.32", + "10.0.0.102", + 860, + 58299, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #77" + ], + [ + 78, + "2025-09-08T02:35:06.534Z", + "192.168.1.132", + "10.0.0.229", + 22411, + 25295, + "TCP", + "Phishing", + "IPS", + "Synthetic threat event #78" + ], + [ + 49, + "2025-09-08T02:35:06.534Z", + "192.168.1.27", + "10.0.0.17", + 29169, + 48481, + "UDP", + "BruteForce", + "SIEM", + "Synthetic threat event #49" + ], + [ + 50, + "2025-09-08T02:35:06.534Z", + "192.168.1.25", + "10.0.0.246", + 9577, + 58301, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #50" + ], + [ + 14, + "2025-09-08T02:35:06.534Z", + "192.168.1.159", + "10.0.0.126", + 14901, + 46758, + "TCP", + "BruteForce", + "SIEM", + "Synthetic threat event #14" + ], + [ + 33, + "2025-09-08T02:35:06.534Z", + "192.168.1.14", + "10.0.0.159", + 60372, + 38800, + "UDP", + "Phishing", + "SIEM", + "Synthetic threat event #33" + ], + [ + 34, + "2025-09-08T02:35:06.534Z", + "192.168.1.65", + "10.0.0.241", + 10889, + 26272, + "UDP", + "BruteForce", + "IDS", + "Synthetic threat event #34" + ], + [ + 69, + "2025-09-08T02:35:06.534Z", + "192.168.1.19", + "10.0.0.35", + 6380, + 28243, + "UDP", + "Malware", + "IDS", + "Synthetic threat event #69" + ], + [ + 70, + "2025-09-08T02:35:06.534Z", + "192.168.1.219", + "10.0.0.47", + 12352, + 19677, + "UDP", + "Malware", + "Firewall", + "Synthetic threat event #70" + ], + [ + 95, + "2025-09-08T02:35:06.534Z", + "192.168.1.6", + "10.0.0.90", + 30160, + 33916, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #95" + ], + [ + 74, + "2025-09-08T02:35:06.534Z", + "192.168.1.93", + "10.0.0.172", + 1001, + 2150, + "TCP", + "Ransomware", + "SIEM", + "Synthetic threat event #74" + ], + [ + 75, + "2025-09-08T02:35:06.534Z", + "192.168.1.73", + "10.0.0.232", + 13631, + 47820, + "TCP", + "DDoS", + "SIEM", + "Synthetic threat event #75" + ], + [ + 91, + "2025-09-08T02:35:06.534Z", + "192.168.1.178", + "10.0.0.211", + 34487, + 25698, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #91" + ], + [ + 92, + "2025-09-08T02:35:06.534Z", + "192.168.1.175", + "10.0.0.109", + 58240, + 55180, + "TCP", + "Ransomware", + "Firewall", + "Synthetic threat event #92" + ], + [ + 38, + "2025-09-08T02:35:06.534Z", + "192.168.1.155", + "10.0.0.24", + 30980, + 42013, + "UDP", + "BruteForce", + "IDS", + "Synthetic threat event #38" + ], + [ + 39, + "2025-09-08T02:35:06.534Z", + "192.168.1.20", + "10.0.0.111", + 11465, + 31299, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #39" + ], + [ + 22, + "2025-09-08T02:35:06.534Z", + "192.168.1.21", + "10.0.0.163", + 30925, + 43234, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #22" + ], + [ + 23, + "2025-09-08T02:35:06.534Z", + "192.168.1.178", + "10.0.0.241", + 25022, + 14691, + "UDP", + "Ransomware", + "SIEM", + "Synthetic threat event #23" + ], + [ + 61, + "2025-09-08T02:35:06.534Z", + "192.168.1.129", + "10.0.0.173", + 29871, + 55279, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #61" + ], + [ + 62, + "2025-09-08T02:35:06.534Z", + "192.168.1.251", + "10.0.0.212", + 15459, + 38634, + "TCP", + "Malware", + "IDS", + "Synthetic threat event #62" + ], + [ + 47, + "2025-09-08T02:35:06.534Z", + "192.168.1.165", + "10.0.0.248", + 33234, + 51449, + "TCP", + "Ransomware", + "IDS", + "Synthetic threat event #47" + ], + [ + 48, + "2025-09-08T02:35:06.534Z", + "192.168.1.124", + "10.0.0.135", + 3611, + 64576, + "TCP", + "DDoS", + "IPS", + "Synthetic threat event #48" + ], + [ + 58, + "2025-09-08T02:35:06.534Z", + "192.168.1.180", + "10.0.0.27", + 30987, + 40680, + "TCP", + "Malware", + "Firewall", + "Synthetic threat event #58" + ], + [ + 59, + "2025-09-08T02:35:06.534Z", + "192.168.1.124", + "10.0.0.147", + 43753, + 58875, + "UDP", + "Ransomware", + "IDS", + "Synthetic threat event #59" + ], + [ + 2, + "2025-09-08T02:35:06.534Z", + "192.168.1.254", + "10.0.0.182", + 37519, + 42530, + "UDP", + "BruteForce", + "IDS", + "Synthetic threat event #2" + ], + [ + 3, + "2025-09-08T02:35:06.534Z", + "192.168.1.188", + "10.0.0.243", + 58260, + 26206, + "UDP", + "Ransomware", + "SIEM", + "Synthetic threat event #3" + ], + [ + 9, + "2025-09-08T02:35:06.534Z", + "192.168.1.234", + "10.0.0.196", + 25644, + 40011, + "UDP", + "Malware", + "IDS", + "Synthetic threat event #9" + ], + [ + 83, + "2025-09-08T02:35:06.534Z", + "192.168.1.129", + "10.0.0.15", + 61985, + 53064, + "TCP", + "Ransomware", + "IDS", + "Synthetic threat event #83" + ], + [ + 84, + "2025-09-08T02:35:06.534Z", + "192.168.1.34", + "10.0.0.150", + 24485, + 47791, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #84" + ], + [ + 24, + "2025-09-08T02:35:06.534Z", + "192.168.1.100", + "10.0.0.226", + 6288, + 52252, + "TCP", + "BruteForce", + "IPS", + "Synthetic threat event #24" + ], + [ + 25, + "2025-09-08T02:35:06.534Z", + "192.168.1.254", + "10.0.0.239", + 11520, + 14800, + "TCP", + "DDoS", + "SIEM", + "Synthetic threat event #25" + ], + [ + 66, + "2025-09-08T02:35:06.534Z", + "192.168.1.219", + "10.0.0.187", + 313, + 24960, + "TCP", + "DDoS", + "IDS", + "Synthetic threat event #66" + ], + [ + 67, + "2025-09-08T02:35:06.534Z", + "192.168.1.65", + "10.0.0.153", + 28272, + 14213, + "UDP", + "Ransomware", + "IPS", + "Synthetic threat event #67" + ], + [ + 97, + "2025-09-08T02:35:06.534Z", + "192.168.1.166", + "10.0.0.191", + 21613, + 4076, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #97" + ], + [ + 98, + "2025-09-08T02:35:06.534Z", + "192.168.1.132", + "10.0.0.210", + 46377, + 24301, + "UDP", + "Malware", + "SIEM", + "Synthetic threat event #98" + ], + [ + 80, + "2025-09-08T02:35:06.534Z", + "192.168.1.91", + "10.0.0.82", + 36588, + 41762, + "TCP", + "DDoS", + "IDS", + "Synthetic threat event #80" + ], + [ + 81, + "2025-09-08T02:35:06.534Z", + "192.168.1.228", + "10.0.0.167", + 38068, + 6209, + "TCP", + "DDoS", + "SIEM", + "Synthetic threat event #81" + ], + [ + 5, + "2025-09-08T02:35:06.534Z", + "192.168.1.137", + "10.0.0.14", + 40293, + 7310, + "UDP", + "Ransomware", + "IPS", + "Synthetic threat event #5" + ], + [ + 6, + "2025-09-08T02:35:06.534Z", + "192.168.1.15", + "10.0.0.25", + 28995, + 49030, + "TCP", + "DDoS", + "Firewall", + "Synthetic threat event #6" + ], + [ + 30, + "2025-09-08T02:35:06.534Z", + "192.168.1.72", + "10.0.0.126", + 43790, + 45511, + "TCP", + "DDoS", + "IDS", + "Synthetic threat event #30" + ], + [ + 31, + "2025-09-08T02:35:06.534Z", + "192.168.1.17", + "10.0.0.47", + 16448, + 22043, + "UDP", + "BruteForce", + "IDS", + "Synthetic threat event #31" + ], + [ + 55, + "2025-09-08T02:35:06.534Z", + "192.168.1.157", + "10.0.0.71", + 22809, + 39626, + "TCP", + "Phishing", + "IPS", + "Synthetic threat event #55" + ], + [ + 56, + "2025-09-08T02:35:06.534Z", + "192.168.1.65", + "10.0.0.132", + 62734, + 64579, + "TCP", + "DDoS", + "IDS", + "Synthetic threat event #56" + ], + [ + 64, + "2025-09-08T02:35:06.534Z", + "192.168.1.112", + "10.0.0.10", + 16794, + 32746, + "TCP", + "Malware", + "IDS", + "Synthetic threat event #64" + ], + [ + 99, + "2025-09-08T02:35:06.534Z", + "192.168.1.48", + "10.0.0.1", + 44645, + 23540, + "UDP", + "DDoS", + "IDS", + "Synthetic threat event #99" + ], + [ + 100, + "2025-09-08T02:35:06.534Z", + "192.168.1.35", + "10.0.0.140", + 48502, + 64007, + "TCP", + "Phishing", + "SIEM", + "Synthetic threat event #100" + ], + [ + 89, + "2025-09-08T02:35:06.534Z", + "192.168.1.198", + "10.0.0.209", + 7236, + 19763, + "UDP", + "DDoS", + "IDS", + "Synthetic threat event #89" + ], + [ + 87, + "2025-09-08T02:35:06.534Z", + "192.168.1.136", + "10.0.0.237", + 37253, + 42372, + "TCP", + "Phishing", + "IPS", + "Synthetic threat event #87" + ], + [ + 44, + "2025-09-08T02:35:06.534Z", + "192.168.1.64", + "10.0.0.178", + 20686, + 37462, + "TCP", + "DDoS", + "IDS", + "Synthetic threat event #44" + ], + [ + 45, + "2025-09-08T02:35:06.534Z", + "192.168.1.152", + "10.0.0.157", + 60717, + 56603, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #45" + ], + [ + 18, + "2025-09-08T02:35:06.534Z", + "192.168.1.169", + "10.0.0.217", + 32820, + 61164, + "TCP", + "BruteForce", + "Firewall", + "Synthetic threat event #18" + ], + [ + 4, + "2025-09-08T02:35:06.534Z", + "192.168.1.239", + "10.0.0.95", + 41092, + 57741, + "TCP", + "Ransomware", + "Firewall", + "Synthetic threat event #4" + ], + [ + 26, + "2025-09-08T02:35:06.534Z", + "192.168.1.104", + "10.0.0.183", + 24462, + 55890, + "UDP", + "BruteForce", + "Firewall", + "Synthetic threat event #26" + ], + [ + 90, + "2025-09-08T02:35:06.534Z", + "192.168.1.152", + "10.0.0.192", + 15089, + 24172, + "TCP", + "Ransomware", + "Firewall", + "Synthetic threat event #90" + ], + [ + 96, + "2025-09-08T02:35:06.534Z", + "192.168.1.14", + "10.0.0.229", + 23857, + 32644, + "UDP", + "BruteForce", + "Firewall", + "Synthetic threat event #96" + ], + [ + 1, + "2025-09-08T02:35:06.534Z", + "192.168.1.94", + "10.0.0.253", + 55334, + 48431, + "TCP", + "BruteForce", + "Firewall", + "Synthetic threat event #1" + ], + [ + 51, + "2025-09-08T02:35:06.534Z", + "192.168.1.197", + "10.0.0.163", + 41237, + 61209, + "TCP", + "Phishing", + "Firewall", + "Synthetic threat event #51" + ], + [ + 71, + "2025-09-08T02:35:06.534Z", + "192.168.1.193", + "10.0.0.241", + 7529, + 28691, + "TCP", + "Ransomware", + "IPS", + "Synthetic threat event #71" + ], + [ + 15, + "2025-09-08T02:35:06.534Z", + "192.168.1.131", + "10.0.0.26", + 201, + 10703, + "UDP", + "BruteForce", + "SIEM", + "Synthetic threat event #15" + ], + [ + 29, + "2025-09-08T02:35:06.534Z", + "192.168.1.23", + "10.0.0.83", + 36350, + 40712, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #29" + ], + [ + 65, + "2025-09-08T02:35:06.534Z", + "192.168.1.103", + "10.0.0.123", + 13211, + 54881, + "UDP", + "BruteForce", + "IPS", + "Synthetic threat event #65" + ], + [ + 82, + "2025-09-08T02:35:06.534Z", + "192.168.1.236", + "10.0.0.152", + 60323, + 49303, + "TCP", + "BruteForce", + "IDS", + "Synthetic threat event #82" + ], + [ + 85, + "2025-09-08T02:35:06.534Z", + "192.168.1.156", + "10.0.0.210", + 30996, + 7227, + "UDP", + "BruteForce", + "IDS", + "Synthetic threat event #85" + ], + [ + 21, + "2025-09-08T02:35:06.534Z", + "192.168.1.136", + "10.0.0.145", + 36608, + 35947, + "TCP", + "DDoS", + "Firewall", + "Synthetic threat event #21" + ], + [ + 43, + "2025-09-08T02:35:06.534Z", + "192.168.1.220", + "10.0.0.125", + 46668, + 30725, + "UDP", + "Ransomware", + "IDS", + "Synthetic threat event #43" + ], + [ + 76, + "2025-09-08T02:35:06.534Z", + "192.168.1.207", + "10.0.0.112", + 57319, + 41247, + "UDP", + "Ransomware", + "IPS", + "Synthetic threat event #76" + ], + [ + 10, + "2025-09-08T02:35:06.534Z", + "192.168.1.245", + "10.0.0.172", + 23178, + 46172, + "UDP", + "BruteForce", + "IDS", + "Synthetic threat event #10" + ], + [ + 93, + "2025-09-08T02:35:06.534Z", + "192.168.1.197", + "10.0.0.189", + 26016, + 53849, + "UDP", + "Phishing", + "IDS", + "Synthetic threat event #93" + ], + [ + 46, + "2025-09-08T02:35:06.534Z", + "192.168.1.47", + "10.0.0.29", + 26717, + 62558, + "TCP", + "BruteForce", + "IDS", + "Synthetic threat event #46" + ], + [ + 54, + "2025-09-08T02:35:06.534Z", + "192.168.1.31", + "10.0.0.42", + 59786, + 53595, + "TCP", + "Malware", + "IDS", + "Synthetic threat event #54" + ], + [ + 7, + "2025-09-08T02:35:06.534Z", + "192.168.1.170", + "10.0.0.222", + 29143, + 64710, + "UDP", + "Phishing", + "IPS", + "Synthetic threat event #7" + ], + [ + 68, + "2025-09-08T02:35:06.534Z", + "192.168.1.96", + "10.0.0.2", + 50950, + 37795, + "TCP", + "Malware", + "IDS", + "Synthetic threat event #68" + ], + [ + 32, + "2025-09-08T02:35:06.534Z", + "192.168.1.145", + "10.0.0.243", + 4574, + 14317, + "UDP", + "DDoS", + "SIEM", + "Synthetic threat event #32" + ], + [ + 60, + "2025-09-08T02:35:06.534Z", + "192.168.1.223", + "10.0.0.164", + 33680, + 1296, + "TCP", + "DDoS", + "IPS", + "Synthetic threat event #60" + ], + [ + 79, + "2025-09-08T02:35:06.534Z", + "192.168.1.187", + "10.0.0.188", + 36484, + 25886, + "TCP", + "DDoS", + "IDS", + "Synthetic threat event #79" + ], + [ + 35, + "2025-09-08T02:35:06.534Z", + "192.168.1.48", + "10.0.0.209", + 59127, + 7478, + "UDP", + "DDoS", + "IPS", + "Synthetic threat event #35" + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "threat_id", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "event_timestamp", + "type": "\"timestamp\"" + }, + { + "metadata": "{}", + "name": "source_ip", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "destination_ip", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "source_port", + "type": "\"integer\"" + }, + { + "metadata": "{}", + "name": "destination_port", + "type": "\"integer\"" + }, + { + "metadata": "{}", + "name": "protocol", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "threat_type", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "detection_tool", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "description", + "type": "\"string\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "cyber_threat_detection_df.display()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "84dfff01-34f0-42be-8c66-2c2dab02c996", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Display User Info Data" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "
user_iduser_namedepartmentemailip_addresslocation
1Alice JohnsonFinancealice.johnson@corp.com192.168.1.10Toronto
2Bob SmithHRbob.smith@corp.com192.168.1.11New York
3Charlie BrownITcharlie.brown@corp.com172.16.0.2Chicago
4Diana PrinceSecuritydiana.prince@corp.com172.16.0.3London
5Ethan HuntOperationsethan.hunt@corp.com10.1.1.100Berlin
6Fiona DavisMarketingfiona.davis@corp.com192.168.1.20Toronto
7George MillerFinancegeorge.miller@corp.com192.168.1.21New York
8Hannah LeeIThannah.lee@corp.com172.16.0.10Chicago
9Ian ThomasSecurityian.thomas@corp.com10.0.0.25London
10Julia RobertsHRjulia.roberts@corp.com192.168.1.30Berlin
11Kevin WhiteITkevin.white@corp.com172.16.0.20Toronto
12Laura WilsonFinancelaura.wilson@corp.com10.0.0.50Chicago
13Michael ClarkOperationsmichael.clark@corp.com192.168.1.40New York
14Nina PatelITnina.patel@corp.com172.16.0.30London
15Oscar LopezSecurityoscar.lopez@corp.com10.0.0.75Toronto
16Priya SharmaMarketingpriya.sharma@corp.com192.168.1.50Berlin
17Quentin BlakeFinancequentin.blake@corp.com192.168.1.60New York
18Rachel GreenHRrachel.green@corp.com10.0.0.100Chicago
19Sam WilsonITsam.wilson@corp.com172.16.0.40London
20Tina BrownSecuritytina.brown@corp.com192.168.1.70Toronto
" + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + 1, + "Alice Johnson", + "Finance", + "alice.johnson@corp.com", + "192.168.1.10", + "Toronto" + ], + [ + 2, + "Bob Smith", + "HR", + "bob.smith@corp.com", + "192.168.1.11", + "New York" + ], + [ + 3, + "Charlie Brown", + "IT", + "charlie.brown@corp.com", + "172.16.0.2", + "Chicago" + ], + [ + 4, + "Diana Prince", + "Security", + "diana.prince@corp.com", + "172.16.0.3", + "London" + ], + [ + 5, + "Ethan Hunt", + "Operations", + "ethan.hunt@corp.com", + "10.1.1.100", + "Berlin" + ], + [ + 6, + "Fiona Davis", + "Marketing", + "fiona.davis@corp.com", + "192.168.1.20", + "Toronto" + ], + [ + 7, + "George Miller", + "Finance", + "george.miller@corp.com", + "192.168.1.21", + "New York" + ], + [ + 8, + "Hannah Lee", + "IT", + "hannah.lee@corp.com", + "172.16.0.10", + "Chicago" + ], + [ + 9, + "Ian Thomas", + "Security", + "ian.thomas@corp.com", + "10.0.0.25", + "London" + ], + [ + 10, + "Julia Roberts", + "HR", + "julia.roberts@corp.com", + "192.168.1.30", + "Berlin" + ], + [ + 11, + "Kevin White", + "IT", + "kevin.white@corp.com", + "172.16.0.20", + "Toronto" + ], + [ + 12, + "Laura Wilson", + "Finance", + "laura.wilson@corp.com", + "10.0.0.50", + "Chicago" + ], + [ + 13, + "Michael Clark", + "Operations", + "michael.clark@corp.com", + "192.168.1.40", + "New York" + ], + [ + 14, + "Nina Patel", + "IT", + "nina.patel@corp.com", + "172.16.0.30", + "London" + ], + [ + 15, + "Oscar Lopez", + "Security", + "oscar.lopez@corp.com", + "10.0.0.75", + "Toronto" + ], + [ + 16, + "Priya Sharma", + "Marketing", + "priya.sharma@corp.com", + "192.168.1.50", + "Berlin" + ], + [ + 17, + "Quentin Blake", + "Finance", + "quentin.blake@corp.com", + "192.168.1.60", + "New York" + ], + [ + 18, + "Rachel Green", + "HR", + "rachel.green@corp.com", + "10.0.0.100", + "Chicago" + ], + [ + 19, + "Sam Wilson", + "IT", + "sam.wilson@corp.com", + "172.16.0.40", + "London" + ], + [ + 20, + "Tina Brown", + "Security", + "tina.brown@corp.com", + "192.168.1.70", + "Toronto" + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "user_id", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "user_name", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "department", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "email", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "ip_address", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "location", + "type": "\"string\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "user_info_df.display()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "461eb894-f71a-41ef-a303-c3b9972126e9", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Catalog and Schema" + } + }, + "outputs": [], + "source": [ + "catalog = dbutils.widgets.get(\"catalog\")\n", + "schema = dbutils.widgets.get(\"schema\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "32691ab7-8be1-484e-9f5f-035a53d1c233", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Create and Replace Cyber Threat and User Info Tables" + } + }, + "outputs": [], + "source": [ + "cyber_threat_table_name = f\"{catalog}.{schema}.cyber_threat_detection\"\n", + "user_info_table_name = f\"{catalog}.{schema}.user_info\"\n", + "cyber_threat_detection_df.writeTo(cyber_threat_table_name).createOrReplace()\n", + "user_info_df.writeTo(user_info_table_name).createOrReplace()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "491ab798-36e9-40da-843f-6d7d88b638ab", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Get Cyber Threat Info Function Definition" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "CREATE OR REPLACE FUNCTION bo_cheng_dnb_demos.agents.get_cyber_threat_info(\n", + " threat_type STRING COMMENT 'input cyber threat type'\n", + " )\n", + " RETURNS STRING\n", + " COMMENT 'Returns latest threat_id, event_timestamp, source_ip, protocol, detection_tool given a threat_type'\n", + " RETURN\n", + " SELECT\n", + " CONCAT(\n", + " 'Threat ID: ',\n", + " threat_id,\n", + " ', ',\n", + " 'Timestamp: ',\n", + " event_timestamp,\n", + " ', ',\n", + " 'Source IP: ',\n", + " source_ip,\n", + " ', ',\n", + " 'Protocol: ',\n", + " protocol,\n", + " ', ',\n", + " 'Detection Tool: ',\n", + " detection_tool\n", + " )\n", + " FROM\n", + " bo_cheng_dnb_demos.agents.cyber_threat_detection\n", + " WHERE\n", + " threat_type = threat_type\n", + " ORDER BY\n", + " event_timestamp DESC\n", + " LIMIT 1;" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d75c78b9-7782-41a0-a91a-c305a1815c73", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Query Cyber Threat Info Function" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "-- SELECT\n", + "-- bo_cheng_dnb_demos.agents.get_cyber_threat_info(\"Ransomware\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b07dd6a2-266f-49fd-9198-3e7b79f435dc", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Retrieve User Info by Source IP" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "CREATE OR REPLACE FUNCTION bo_cheng_dnb_demos.agents.get_user_info(\n", + " source_ip STRING COMMENT 'input ip address'\n", + " )\n", + " RETURNS STRING\n", + " COMMENT 'Returns latest user_name, department, email, ip_address, location given a source_ip address'\n", + " RETURN\n", + " SELECT\n", + " CONCAT(\n", + " 'Username: ',\n", + " user_name,\n", + " ', ',\n", + " 'Department: ',\n", + " department,\n", + " ', ',\n", + " 'Email: ',\n", + " email,\n", + " ', ',\n", + " 'IP Address: ',\n", + " ip_address,\n", + " ', ',\n", + " 'Location: ',\n", + " location\n", + " )\n", + " FROM\n", + " bo_cheng_dnb_demos.agents.user_info\n", + " WHERE\n", + " ip_address = source_ip\n", + " LIMIT 1;" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "61a89468-f374-4661-8024-db52744ac7e2", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "SQL Get User Info Function Call" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "-- SELECT\n", + "-- bo_cheng_dnb_demos.agents.get_user_info(\"192.168.1.21\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 5448090906010207, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 2 + }, + "notebookName": "00-data-uc-function-setup", + "widgets": { + "catalog": { + "currentValue": "bo_cheng_dnb_demos", + "nuid": "8430696f-22f3-44d7-806d-b395ecb0e787", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "", + "label": "catalog", + "name": "catalog", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "", + "label": "catalog", + "name": "catalog", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "cyber_full_path": { + "currentValue": "/Volumes/bo_cheng_dnb_demos/agents/memory_agent_volume/cyber_threat_detection/", + "nuid": "140c8ef4-9ffd-4eb2-b327-eefd48e17412", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "", + "label": "cyber_full_path", + "name": "cyber_full_path", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "", + "label": "cyber_full_path", + "name": "cyber_full_path", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "schema": { + "currentValue": "agents", + "nuid": "d6bdb1be-f6d3-4241-a1ca-5a0c032f57b6", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "", + "label": "schema", + "name": "schema", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "", + "label": "schema", + "name": "schema", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "user_full_path": { + "currentValue": "/Volumes/bo_cheng_dnb_demos/agents/memory_agent_volume/user_info/", + "nuid": "30441a11-250a-4247-9ed9-79de4e7471ad", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "", + "label": "user_full_path", + "name": "user_full_path", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "", + "label": "user_full_path", + "name": "user_full_path", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/01-lakebase-instance-setup.ipynb b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/01-lakebase-instance-setup.ipynb new file mode 100644 index 0000000..b19cf31 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/01-lakebase-instance-setup.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "1247c05c-b447-4270-a9d2-d704eae0af9e", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Install and Restart Databricks SDK Library" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ngoogle-api-core 2.20.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.19.5, but you have protobuf 6.33.2 which is incompatible.\ngrpcio-status 1.71.0 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 6.33.2 which is incompatible.\nmlflow-skinny 2.21.3 requires protobuf<6,>=3.12.0, but you have protobuf 6.33.2 which is incompatible.\u001B[0m\u001B[31m\n\u001B[0m\u001B[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.\u001B[0m\n" + ] + } + ], + "source": [ + "%pip install -U -qqqq databricks-sdk\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "80b655ec-94f8-4933-b8a9-507726f11fa9", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Initialize WorkspaceClient in Databricks SDK" + } + }, + "outputs": [], + "source": [ + "from databricks.sdk import WorkspaceClient\n", + "\n", + "w = WorkspaceClient()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "2d404a67-9778-403f-a3e2-180a12080f49", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Create and Wait for Database Instance in Databricks SDK" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DatabaseInstance(name='bo-test-lakebase-3', capacity='CU_1', child_instance_refs=[], creation_time='2026-01-08T19:16:22Z', creator='bo.cheng@databricks.com', custom_tags=[], effective_capacity='CU_1', effective_custom_tags=[], effective_enable_pg_native_login=True, effective_enable_readable_secondaries=False, effective_node_count=1, effective_retention_window_in_days=7, effective_stopped=False, effective_usage_policy_id=None, enable_pg_native_login=None, enable_readable_secondaries=None, node_count=None, parent_instance_ref=None, pg_version='PG_VERSION_16', read_only_dns='instance-ro-cd00746e-b544-45c8-9f08-5062a0858c7d.database.cloud.databricks.com', read_write_dns='instance-cd00746e-b544-45c8-9f08-5062a0858c7d.database.cloud.databricks.com', retention_window_in_days=None, state=, stopped=None, uid='cd00746e-b544-45c8-9f08-5062a0858c7d', usage_policy_id=None)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from databricks.sdk.service.database import DatabaseInstance\n", + "from datetime import timedelta\n", + "\n", + "database_instance = DatabaseInstance(\n", + " name=\"bo-test-lakebase-3\",\n", + " capacity=\"CU_1\",\n", + " enable_pg_native_login=True,\n", + ")\n", + "w.database.create_database_instance_and_wait(\n", + " database_instance=database_instance, timeout=timedelta(minutes=20)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "5e800fad-2727-4bfb-835c-33f61e84a815", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DatabaseInstanceRole(name='a797584d-d1b5-477b-bf00-947c57234d74', attributes=DatabaseInstanceRoleAttributes(bypassrls=False, createdb=False, createrole=False), effective_attributes=DatabaseInstanceRoleAttributes(bypassrls=False, createdb=False, createrole=False), identity_type=, instance_name='bo-test-lakebase-3', membership_role=)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from databricks.sdk.service.database import (\n", + " DatabaseInstanceRole,\n", + " DatabaseInstanceRoleAttributes,\n", + " DatabaseInstanceRoleIdentityType,\n", + " DatabaseInstanceRoleMembershipRole,\n", + ")\n", + "\n", + "database_instance_role = DatabaseInstanceRole(\n", + " attributes=DatabaseInstanceRoleAttributes(\n", + " bypassrls=False,\n", + " createdb=False,\n", + " createrole=False,\n", + " ),\n", + " identity_type=DatabaseInstanceRoleIdentityType.SERVICE_PRINCIPAL,\n", + " membership_role=DatabaseInstanceRoleMembershipRole.DATABRICKS_SUPERUSER,\n", + " name=\"a797584d-d1b5-477b-bf00-947c57234d74\",\n", + ")\n", + "w.database.create_database_instance_role(\n", + " instance_name=\"bo-test-lakebase-3\", database_instance_role=database_instance_role\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "491ba59f-142c-4067-8103-597660b0a681", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DatabaseCatalog(name='bo-test-lakebase-catalog', database_instance_name='bo-test-lakebase-3', database_name='databricks_postgres', create_database_if_not_exists=True, uid=None)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from databricks.sdk.service.database import DatabaseCatalog\n", + "\n", + "w.database.create_database_catalog(\n", + " catalog=DatabaseCatalog(\n", + " name=\"bo-test-lakebase-catalog\",\n", + " database_instance_name=\"bo-test-lakebase-3\",\n", + " database_name=\"databricks_postgres\",\n", + " create_database_if_not_exists=True,\n", + " )\n", + ")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "01-lakebase-instance-setup", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/02-lakebase-langgraph-checkpointer-agent.ipynb b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/02-lakebase-langgraph-checkpointer-agent.ipynb new file mode 100644 index 0000000..3f9a174 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/02-lakebase-langgraph-checkpointer-agent.ipynb @@ -0,0 +1,1470 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "e411b194-dd96-4131-ab8a-5b8ba9914df9", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.\u001B[0m\n" + ] + } + ], + "source": [ + "%pip install -U -qqqq databricks-langchain[memory] uv databricks-agents mlflow-skinny[databricks]\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "0550d280-c496-4a2e-9bdc-814a428dee43", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import warnings\n", + "\n", + "warnings.simplefilter(action=\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "43ec4cb9-6706-473c-8cce-ad05defa9f63", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "# Mosaic AI Agent Framework: Author and deploy a Stateful Agent using Databricks Lakebase and LangGraph\n", + "This notebook demonstrates how to build a stateful agent using the Mosaic AI Agent Framework and LangGraph, with Lakebase as the agent’s durable memory and checkpoint store. In this notebook, you will:\n", + "1. Author a Stateful Agent graph with LakeBase (the new Postgres database in Databricks) and Langgraph to manage state using thread ids in a Databricks Agent \n", + "2. Wrap the LangGraph agent with MLflow ChatAgent to ensure compatibility with Databricks features\n", + "3. Test the agent's behavior locally\n", + "4. Register model to Unity Catalog, log and deploy the agent for use in apps and Playground\n", + "\n", + "We use [PostgresSaver in Langgraph](https://api.python.langchain.com/en/latest/checkpoint/langchain_postgres.checkpoint.PostgresSaver.html) to open a connection with our Lakebase, pass it into the checkpoint and pass that into the LangGraph Agent\n", + "\n", + "## Why use Lakebase?\n", + "Stateful agents need a place to persist, resume, and inspect their work. Lakebase provides a managed, UC-governed store for agent state:\n", + "- Durable, resumable state. Automatically capture threads, intermediate checkpoints, tool outputs, and node state after each graph step—so you can resume, branch, or replay any point in time.\n", + "- Queryable & observable. Because state lands in the Lakehouse, you can use SQL (or notebooks) to audit conversations and build upon other Databricks functionality like dashboards\n", + "- Governed by Unity Catalog. Apply data permissions, lineage, and auditing to AI state, just like any other table.\n", + "\n", + "## What are Stateful Agents?\n", + "Unlike stateless LLM calls, a stateful agent keeps and reuses context across steps and sessions. Each new conversation is tracked with a thread ID, which represents the logical task or dialogue stream. This way, you can pick up an existing thread and continue the conversation with your Agent.\n", + "\n", + "## Prerequisites\n", + "- Create a Lakebase instance, see Databricks documentation ([AWS](https://docs.databricks.com/aws/en/oltp/create/) | [Azure](https://learn.microsoft.com/en-us/azure/databricks/oltp/create/)). \n", + "- You can create a Lakebase instance by going to SQL Warehouses -> Lakebase Postgres -> Create database instance. You will need to retrieve values from the \"Connection details\" section of your Lakebase to fill out this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "eb3511ac-953a-4707-bea3-be4376f4ed42", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(name=\"catalog\", defaultValue=\"bo_cheng_dnb_demos\", label=\"catalog\")\n", + "dbutils.widgets.text(name=\"schema\", defaultValue=\"agents\", label=\"schema\")\n", + "dbutils.widgets.text(name=\"model\", defaultValue=\"memory_agent\", label=\"model\")\n", + "dbutils.widgets.text(\n", + " name=\"DATABRICKS_CLIENT_ID\", defaultValue=\"\", label=\"DATABRICKS_CLIENT_ID\"\n", + ")\n", + "dbutils.widgets.text(\n", + " name=\"DATABRICKS_CLIENT_SECRET\", defaultValue=\"\", label=\"DATABRICKS_CLIENT_SECRET\"\n", + ")\n", + "dbutils.widgets.text(name=\"secret_scope\", defaultValue=\"dbdemos\", label=\"secret_scope\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "4c7a72f0-bc6e-489d-8ec3-f4d7ba4238aa", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "three_tiered_model_name='bo_cheng_dnb_demos.agents.memory_agent'\n" + ] + } + ], + "source": [ + "catalog = dbutils.widgets.get(\"catalog\")\n", + "schema = dbutils.widgets.get(\"schema\")\n", + "model = dbutils.widgets.get(\"model\")\n", + "# LLM_ENDPOINT = dbutils.widgets.get(\"foundation_model\")\n", + "assert (\n", + " len(catalog) > 0 and len(schema) > 0 and len(model) > 0\n", + "), \"Please provide a valid catalog, schema, and model name\"\n", + "three_tiered_model_name = f\"{catalog}.{schema}.{model}\"\n", + "print(f\"{three_tiered_model_name=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "a036ef50-66ac-45ec-b678-64d4c3fb3a5d", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Using existing secret scope: dbdemos\n" + ] + } + ], + "source": [ + "from databricks.sdk import WorkspaceClient\n", + "\n", + "w = WorkspaceClient()\n", + "\n", + "DATABRICKS_HOST = w.config.host\n", + "\n", + "secret_scope_name = dbutils.widgets.get(\"secret_scope\")\n", + "\n", + "# if needed create a secret scope\n", + "if secret_scope_name != \"dbdemos\":\n", + " w.secrets.create_scope(scope=secret_scope_name)\n", + "else:\n", + " print(f\"Using existing secret scope: {secret_scope_name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "632ff6d6-902c-478e-8d6f-2c8e743b6d31", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "no DATABRICKS_CLIENT_ID is provided\nno DATABRICKS_CLIENT_ID is provided\n" + ] + } + ], + "source": [ + "if dbutils.widgets.get(\"DATABRICKS_CLIENT_ID\") == \"\":\n", + " print(\"no DATABRICKS_CLIENT_ID is provided\")\n", + "else:\n", + " w.secrets.put_secret(\n", + " scope=secret_scope_name,\n", + " key=\"DATABRICKS_CLIENT_ID\",\n", + " string_value=dbutils.widgets.get(\"DATABRICKS_CLIENT_ID\"),\n", + " )\n", + "if dbutils.widgets.get(\"DATABRICKS_CLIENT_SECRET\") == \"\":\n", + " print(\"no DATABRICKS_CLIENT_ID is provided\")\n", + "else:\n", + " w.secrets.put_secret(\n", + " scope=secret_scope_name,\n", + " key=\"DATABRICKS_CLIENT_SECRET\",\n", + " string_value=dbutils.widgets.get(\"DATABRICKS_CLIENT_SECRET\"),\n", + " )\n", + "w.secrets.put_secret(\n", + " scope=secret_scope_name, key=\"DATABRICKS_HOST\", string_value=DATABRICKS_HOST\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "ea052166-1d1a-44c3-9408-07a16b1bd28e", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# import os\n", + "\n", + "# os.environ[\"DATABRICKS_CLIENT_ID\"] = dbutils.secrets.get(\n", + "# scope=secret_scope_name, key=\"DATABRICKS_CLIENT_ID\"\n", + "# )\n", + "# os.environ[\"DATABRICKS_CLIENT_SECRET\"] = dbutils.secrets.get(\n", + "# scope=secret_scope_name, key=\"DATABRICKS_CLIENT_SECRET\"\n", + "# )\n", + "\n", + "# os.unsetenv(\"DATABRICKS_CLIENT_ID\")\n", + "# os.unsetenv(\"DATABRICKS_CLIENT_SECRET\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "12bb5e31-3252-4dc6-92e9-7013f1e83d43", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Lakebase Config\n", + "- Enable Postgres native role login\n", + "- Might need to wait a few min for pg roles to apply\n", + "- Create new catalog with PostgreSQL Database: `databricks_postgres` schema off lakebase instance for querying purposes" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "2caaac13-e6d5-4475-8ea9-b8cb3abd21ad", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Checkpoint tables are ready.\n" + ] + } + ], + "source": [ + "# First-time checkpoint table setup\n", + "from databricks.sdk import WorkspaceClient\n", + "from databricks_langchain import CheckpointSaver\n", + "\n", + "# --- TODO: Fill in Lakebase instance name ---\n", + "INSTANCE_NAME = \"bo-test-lakebase-3\"\n", + "\n", + "# Create tables if missing\n", + "with CheckpointSaver(instance_name=INSTANCE_NAME) as saver:\n", + " saver.setup() # sets up checkpoint tables\n", + " print(\"✅ Checkpoint tables are ready.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "1ec24de0-62ab-4ac7-a7de-17b15cff0e64", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting agent.py\n" + ] + } + ], + "source": [ + "%%writefile agent.py\n", + "import logging\n", + "import os\n", + "import uuid\n", + "from typing import Annotated, Any, Generator, Optional, Sequence, TypedDict\n", + "\n", + "import mlflow\n", + "from databricks_langchain import (\n", + " ChatDatabricks,\n", + " UCFunctionToolkit,\n", + " CheckpointSaver,\n", + ")\n", + "from databricks.sdk import WorkspaceClient\n", + "from langchain_core.messages import AIMessage, AIMessageChunk, AnyMessage\n", + "from langchain_core.runnables import RunnableConfig, RunnableLambda\n", + "from langgraph.graph import END, StateGraph\n", + "from langgraph.graph.message import add_messages\n", + "from langgraph.prebuilt.tool_node import ToolNode\n", + "from mlflow.pyfunc import ResponsesAgent\n", + "from mlflow.types.responses import (\n", + " ResponsesAgentRequest,\n", + " ResponsesAgentResponse,\n", + " ResponsesAgentStreamEvent,\n", + " output_to_responses_items_stream,\n", + ")\n", + "\n", + "logger = logging.getLogger(__name__)\n", + "logging.basicConfig(level=os.getenv(\"LOG_LEVEL\", \"INFO\"))\n", + "\n", + "############################################\n", + "# Define your LLM endpoint and system prompt\n", + "############################################\n", + "# TODO: Replace with your model serving endpoint\n", + "LLM_ENDPOINT_NAME = \"databricks-gpt-5-2\"\n", + "\n", + "# TODO: Update with your system prompt\n", + "SYSTEM_PROMPT = \"\"\"\n", + " You are an cybersecurity assistant.\n", + " You are given a task and you must complete it.\n", + " Use the following routine to support the customer.\n", + " # Routine:\n", + " 1. Provide the get_cyber_threat_info tool the type of threat being asked about.\n", + " 2. Use the source ip address provided in step 1 as input for the get_user_info tool to retrieve user specific info.\n", + " Use the following tools to complete the task:\n", + " {tools}\"\"\"\n", + "\n", + "############################################\n", + "# Lakebase configuration\n", + "############################################\n", + "# TODO: Fill in Lakebase instance name\n", + "LAKEBASE_INSTANCE_NAME = \"bo-test-lakebase-3\"\n", + "\n", + "###############################################################################\n", + "## Define tools for your agent,enabling it to retrieve data or take actions\n", + "## beyond text generation\n", + "## To create and see usage examples of more tools, see\n", + "## https://docs.databricks.com/en/generative-ai/agent-framework/agent-tool.html\n", + "###############################################################################\n", + "tools = []\n", + "\n", + "# Example UC tools; add your own as needed\n", + "UC_TOOL_NAMES: list[str] = [\n", + " \"bo_cheng_dnb_demos.agents.get_cyber_threat_info\",\n", + " \"bo_cheng_dnb_demos.agents.get_user_info\",\n", + "]\n", + "if UC_TOOL_NAMES:\n", + " uc_toolkit = UCFunctionToolkit(function_names=UC_TOOL_NAMES)\n", + " tools.extend(uc_toolkit.tools)\n", + "\n", + "# Use Databricks vector search indexes as tools\n", + "# See https://docs.databricks.com/en/generative-ai/agent-framework/unstructured-retrieval-tools.html#locally-develop-vector-search-retriever-tools-with-ai-bridge\n", + "# List to store vector search tool instances for unstructured retrieval.\n", + "VECTOR_SEARCH_TOOLS = []\n", + "\n", + "# To add vector search retriever tools,\n", + "# use VectorSearchRetrieverTool and create_tool_info,\n", + "# then append the result to TOOL_INFOS.\n", + "# Example:\n", + "# VECTOR_SEARCH_TOOLS.append(\n", + "# VectorSearchRetrieverTool(\n", + "# index_name=\"\",\n", + "# # filters=\"...\"\n", + "# )\n", + "# )\n", + "\n", + "tools.extend(VECTOR_SEARCH_TOOLS)\n", + "\n", + "#####################\n", + "## Define agent logic\n", + "#####################\n", + "\n", + "\n", + "class AgentState(TypedDict):\n", + " messages: Annotated[Sequence[AnyMessage], add_messages]\n", + " custom_inputs: Optional[dict[str, Any]]\n", + " custom_outputs: Optional[dict[str, Any]]\n", + "\n", + "\n", + "class LangGraphResponsesAgent(ResponsesAgent):\n", + " \"\"\"Stateful agent using ResponsesAgent with pooled Lakebase checkpointing.\"\"\"\n", + "\n", + " def __init__(self, lakebase_config: dict[str, Any]):\n", + " self.workspace_client = WorkspaceClient()\n", + "\n", + " self.model = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME)\n", + " self.system_prompt = SYSTEM_PROMPT\n", + " self.model_with_tools = self.model.bind_tools(tools) if tools else self.model\n", + "\n", + " def _create_graph(self, checkpointer: Any):\n", + " def should_continue(state: AgentState):\n", + " messages = state[\"messages\"]\n", + " last_message = messages[-1]\n", + " if isinstance(last_message, AIMessage) and last_message.tool_calls:\n", + " return \"continue\"\n", + " return \"end\"\n", + "\n", + " preprocessor = (\n", + " RunnableLambda(\n", + " lambda state: [{\"role\": \"system\", \"content\": self.system_prompt}]\n", + " + state[\"messages\"]\n", + " )\n", + " if self.system_prompt\n", + " else RunnableLambda(lambda state: state[\"messages\"])\n", + " )\n", + " model_runnable = preprocessor | self.model_with_tools\n", + "\n", + " def call_model(state: AgentState, config: RunnableConfig):\n", + " response = model_runnable.invoke(state, config)\n", + " return {\"messages\": [response]}\n", + "\n", + " workflow = StateGraph(AgentState)\n", + " workflow.add_node(\"agent\", RunnableLambda(call_model))\n", + "\n", + " if tools:\n", + " workflow.add_node(\"tools\", ToolNode(tools))\n", + " workflow.add_conditional_edges(\n", + " \"agent\", should_continue, {\"continue\": \"tools\", \"end\": END}\n", + " )\n", + " workflow.add_edge(\"tools\", \"agent\")\n", + " else:\n", + " workflow.add_edge(\"agent\", END)\n", + "\n", + " workflow.set_entry_point(\"agent\")\n", + " return workflow.compile(checkpointer=checkpointer)\n", + "\n", + " def _get_or_create_thread_id(self, request: ResponsesAgentRequest) -> str:\n", + " \"\"\"Get thread_id from request or create a new one.\n", + "\n", + " Priority:\n", + " 1. Use thread_id from custom_inputs if present\n", + " 2. Use conversation_id from chat context if available\n", + " 3. Generate a new UUID\n", + "\n", + " Returns:\n", + " thread_id: The thread identifier to use for this conversation\n", + " \"\"\"\n", + " ci = dict(request.custom_inputs or {})\n", + "\n", + " if \"thread_id\" in ci:\n", + " return ci[\"thread_id\"]\n", + "\n", + " # using conversation id from chat context as thread id\n", + " # https://mlflow.org/docs/latest/api_reference/python_api/mlflow.types.html#mlflow.types.agent.ChatContext\n", + " if request.context and getattr(request.context, \"conversation_id\", None):\n", + " return request.context.conversation_id\n", + "\n", + " # Generate new thread_id\n", + " return str(uuid.uuid4())\n", + "\n", + " def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse:\n", + " outputs = [\n", + " event.item\n", + " for event in self.predict_stream(request)\n", + " if event.type == \"response.output_item.done\"\n", + " ]\n", + " return ResponsesAgentResponse(\n", + " output=outputs, custom_outputs=request.custom_inputs\n", + " )\n", + "\n", + " def predict_stream(\n", + " self, request: ResponsesAgentRequest\n", + " ) -> Generator[ResponsesAgentStreamEvent, None, None]:\n", + " thread_id = self._get_or_create_thread_id(request)\n", + " ci = dict(request.custom_inputs or {})\n", + " ci[\"thread_id\"] = thread_id\n", + " request.custom_inputs = ci\n", + "\n", + " # Convert incoming Responses messages to ChatCompletions format\n", + " # LangChain will automatically convert from ChatCompletions to LangChain format\n", + " cc_msgs = self.prep_msgs_for_cc_llm([i.model_dump() for i in request.input])\n", + " langchain_msgs = cc_msgs\n", + " checkpoint_config = {\"configurable\": {\"thread_id\": thread_id}}\n", + "\n", + " with CheckpointSaver(instance_name=LAKEBASE_INSTANCE_NAME) as checkpointer:\n", + " graph = self._create_graph(checkpointer)\n", + "\n", + " for event in graph.stream(\n", + " {\"messages\": langchain_msgs},\n", + " checkpoint_config,\n", + " stream_mode=[\"updates\", \"messages\"],\n", + " ):\n", + " if event[0] == \"updates\":\n", + " for node_data in event[1].values():\n", + " if len(node_data.get(\"messages\", [])) > 0:\n", + " yield from output_to_responses_items_stream(\n", + " node_data[\"messages\"]\n", + " )\n", + " elif event[0] == \"messages\":\n", + " try:\n", + " chunk = event[1][0]\n", + " if isinstance(chunk, AIMessageChunk) and chunk.content:\n", + " yield ResponsesAgentStreamEvent(\n", + " **self.create_text_delta(\n", + " delta=chunk.content, item_id=chunk.id\n", + " ),\n", + " )\n", + " except Exception as exc:\n", + " logger.error(\"Error streaming chunk: %s\", exc)\n", + "\n", + "\n", + "# ----- Export model -----\n", + "mlflow.langchain.autolog()\n", + "AGENT = LangGraphResponsesAgent(LAKEBASE_INSTANCE_NAME)\n", + "mlflow.models.set_model(AGENT)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "36beb42b-d89b-4bde-adde-2509bcc03028", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:unitycatalog.ai.core.base:Setting global UC Function client to DatabricksFunctionClient with default configuration.\nINFO:databricks_ai_bridge.lakebase:lakebase pool ready: host=instance-cd00746e-b544-45c8-9f08-5062a0858c7d.database.cloud.databricks.com db=databricks_postgres min=1 max=10 cache=3000s\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 19:30:07,745 5046 INFO execute_command Execute command for command \n2026-01-08 19:30:07,745 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 19:30:07,763 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 19:30:07,763 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 19:30:07,767 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 19:30:07,767 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 19:30:08,148 5046 INFO to_table Executing plan \n2026-01-08 19:30:08,148 5046 INFO to_table Executing plan \nINFO:pyspark.sql.connect.client.logging:Executing plan \n2026-01-08 19:30:08,153 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 19:30:08,153 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 19:30:08,156 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 19:30:08,156 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\nINFO:unitycatalog.ai.core.utils.retry_utils:Successfully re-acquired connection to a serverless instance.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 19:30:11,102 5046 INFO execute_command Execute command for command \n2026-01-08 19:30:11,102 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 19:30:11,109 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 19:30:11,109 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 19:30:11,111 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 19:30:11,111 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 19:30:11,259 5046 INFO to_table Executing plan \n2026-01-08 19:30:11,259 5046 INFO to_table Executing plan \nINFO:pyspark.sql.connect.client.logging:Executing plan \n2026-01-08 19:30:11,262 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 19:30:11,262 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 19:30:11,265 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 19:30:11,265 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\nINFO:unitycatalog.ai.core.utils.retry_utils:Successfully re-acquired connection to a serverless instance.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "{'object': 'response', 'output': [{'type': 'function_call', 'id': 'lc_run--019b9f16-7000-78e3-bd78-94983030ded9', 'call_id': 'call_leoNewTFtEi4QQ8oBaDiPmV4', 'name': 'bo_cheng_dnb_demos__agents__get_cyber_threat_info', 'arguments': '{\"threat_type\": \"malware\"}'}, {'type': 'function_call_output', 'call_id': 'call_leoNewTFtEi4QQ8oBaDiPmV4', 'output': '{\"format\": \"SCALAR\", \"value\": \"Threat ID: 8, Timestamp: 2025-09-08 02:35:06.534731, Source IP: 192.168.1.21, Protocol: TCP, Detection Tool: IDS\"}'}, {'type': 'function_call', 'id': 'lc_run--019b9f16-7e87-7fb0-9c8b-4ab1df32bef2', 'call_id': 'call_9LUc1bpclZXHLhAYxxSe2K1F', 'name': 'bo_cheng_dnb_demos__agents__get_user_info', 'arguments': '{\"source_ip\": \"192.168.1.21\"}'}, {'type': 'function_call_output', 'call_id': 'call_9LUc1bpclZXHLhAYxxSe2K1F', 'output': '{\"format\": \"SCALAR\", \"value\": \"Username: George Miller, Department: Finance, Email: george.miller@corp.com, IP Address: 192.168.1.21, Location: New York\"}'}, {'type': 'message', 'id': 'lc_run--019b9f16-84a2-7940-82ac-a196699bfb48', 'content': [{'text': 'The latest **malware** threat (Threat ID **8**, detected **2025-09-08 02:35:06.534731** via **IDS**) originated from source IP **192.168.1.21**, which maps to:\\n\\n- **User:** George Miller \\n- **Department:** Finance \\n- **Email:** george.miller@corp.com \\n- **Location:** New York', 'type': 'output_text'}], 'role': 'assistant'}], 'custom_outputs': {'thread_id': '522eba5f-4b99-48d6-941b-015f6b9727ab'}}\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/databricks.mlflow.trace": "{\"trace_id\": \"tr-173a315b8e4eb871ce4d27585d76b1ee\", \"sql_warehouse_id\": null}", + "text/plain": [ + "Trace(trace_id=tr-173a315b8e4eb871ce4d27585d76b1ee)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from agent import AGENT\n", + "\n", + "result = AGENT.predict(\n", + " {\n", + " \"input\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Who committed the latest malware threat?\",\n", + " }\n", + " ],\n", + " }\n", + ")\n", + "print(result.model_dump(exclude_none=True))\n", + "thread_id = result.custom_outputs[\"thread_id\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d011adc9-c127-4a57-b805-d3172c65d995", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:databricks_ai_bridge.lakebase:lakebase pool ready: host=instance-cd00746e-b544-45c8-9f08-5062a0858c7d.database.cloud.databricks.com db=databricks_postgres min=1 max=10 cache=3000s\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Response 2: {'object': 'response', 'output': [{'type': 'message', 'id': 'lc_run--019b9f69-a8c5-7bc3-ab19-f08795eddbd5', 'content': [{'text': '**George Miller** was just mentioned.', 'type': 'output_text'}], 'role': 'assistant'}], 'custom_outputs': {'thread_id': '522eba5f-4b99-48d6-941b-015f6b9727ab'}}\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/databricks.mlflow.trace": "{\"trace_id\": \"tr-70343c2f84828c48f898418c1d65de3e\", \"sql_warehouse_id\": null}", + "text/plain": [ + "Trace(trace_id=tr-70343c2f84828c48f898418c1d65de3e)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Message 2, include thread ID and notice how agent remembers context from previous predict message\n", + "response2 = AGENT.predict(\n", + " {\n", + " \"input\": [{\"role\": \"user\", \"content\": \"Who was just mentioned?\"}],\n", + " \"custom_inputs\": {\"thread_id\": thread_id},\n", + " }\n", + ")\n", + "print(\"Response 2:\", response2.model_dump(exclude_none=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "37fef865-8ca5-4d0f-83a6-657b3edccf65", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "output_type": "stream", + "text": [ + "\uD83D\uDD17 View Logged Model at: https://e2-demo-field-eng.cloud.databricks.com/ml/experiments/f930eaa2963d4e668ba4e4cb275dd25e/models/m-e503f6a894044bf1bab4f00f7e8fb5aa?o=1444828305810485\n2026/01/08 20:29:41 INFO mlflow.pyfunc: Predicting on input example to validate output\n2026/01/08 20:29:41 WARNING mlflow.tracing.fluent: Failed to start span predict_stream: 'NonRecordingSpan' object has no attribute 'context'. For full traceback, set logging level to debug.\nINFO:databricks_ai_bridge.lakebase:lakebase pool ready: host=instance-cd00746e-b544-45c8-9f08-5062a0858c7d.database.cloud.databricks.com db=databricks_postgres min=1 max=10 cache=3000s\n2026/01/08 20:29:42 WARNING mlflow.tracing.fluent: Failed to start span LangGraph: 'NonRecordingSpan' object has no attribute 'context'. For full traceback, set logging level to debug.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 20:29:43,991 5046 INFO execute_command Execute command for command \n2026-01-08 20:29:43,991 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 20:29:43,999 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:43,999 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:44,001 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:44,001 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 20:29:44,052 5046 ERROR _handle_rpc_error GRPC Error received\nTraceback (most recent call last):\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py\", line 1726, in _execute_and_fetch_as_iterator\n for b in generator:\n File \"\", line 330, in __next__\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 139, in send\n if not self._has_next():\n ^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 200, in _has_next\n raise e\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 172, in _has_next\n self._current = self._call_iter(\n ^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 297, in _call_iter\n raise e\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 277, in _call_iter\n return iter_fun()\n ^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 173, in \n lambda: next(self._iterator) # type: ignore[arg-type]\n ^^^^^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/grpc/_channel.py\", line 543, in __next__\n return self._next()\n ^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/grpc/_channel.py\", line 969, in _next\n raise self\ngrpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:\n\tstatus = StatusCode.FAILED_PRECONDITION\n\tdetails = \"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=54689871-917b-4fe1-a9dc-af496126a68a, reason=INACTIVITY_TIMEOUT]. (requestId=be46e4b9-fa08-4a17-837a-96f9163980b8)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer {grpc_message:\"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=54689871-917b-4fe1-a9dc-af496126a68a, reason=INACTIVITY_TIMEOUT]. (requestId=be46e4b9-fa08-4a17-837a-96f9163980b8)\", grpc_status:9, created_time:\"2026-01-08T20:29:44.051558111+00:00\"}\"\n>\n2026-01-08 20:29:44,052 5046 ERROR _handle_rpc_error GRPC Error received\nTraceback (most recent call last):\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py\", line 1726, in _execute_and_fetch_as_iterator\n for b in generator:\n File \"\", line 330, in __next__\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 139, in send\n if not self._has_next():\n ^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 200, in _has_next\n raise e\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 172, in _has_next\n self._current = self._call_iter(\n ^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 297, in _call_iter\n raise e\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 277, in _call_iter\n return iter_fun()\n ^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 173, in \n lambda: next(self._iterator) # type: ignore[arg-type]\n ^^^^^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/grpc/_channel.py\", line 543, in __next__\n return self._next()\n ^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/grpc/_channel.py\", line 969, in _next\n raise self\ngrpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:\n\tstatus = StatusCode.FAILED_PRECONDITION\n\tdetails = \"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=54689871-917b-4fe1-a9dc-af496126a68a, reason=INACTIVITY_TIMEOUT]. (requestId=be46e4b9-fa08-4a17-837a-96f9163980b8)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer {grpc_message:\"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=54689871-917b-4fe1-a9dc-af496126a68a, reason=INACTIVITY_TIMEOUT]. (requestId=be46e4b9-fa08-4a17-837a-96f9163980b8)\", grpc_status:9, created_time:\"2026-01-08T20:29:44.051558111+00:00\"}\"\n>\nERROR:pyspark.sql.connect.client.logging:GRPC Error received\nTraceback (most recent call last):\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py\", line 1726, in _execute_and_fetch_as_iterator\n for b in generator:\n File \"\", line 330, in __next__\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 139, in send\n if not self._has_next():\n ^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 200, in _has_next\n raise e\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 172, in _has_next\n self._current = self._call_iter(\n ^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 297, in _call_iter\n raise e\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 277, in _call_iter\n return iter_fun()\n ^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py\", line 173, in \n lambda: next(self._iterator) # type: ignore[arg-type]\n ^^^^^^^^^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/grpc/_channel.py\", line 543, in __next__\n return self._next()\n ^^^^^^^^^^^^\n File \"/databricks/python/lib/python3.11/site-packages/grpc/_channel.py\", line 969, in _next\n raise self\ngrpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:\n\tstatus = StatusCode.FAILED_PRECONDITION\n\tdetails = \"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=54689871-917b-4fe1-a9dc-af496126a68a, reason=INACTIVITY_TIMEOUT]. (requestId=be46e4b9-fa08-4a17-837a-96f9163980b8)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer {grpc_message:\"BAD_REQUEST: session_id is no longer usable. Generate a new session_id by detaching and reattaching the compute and then try again [sessionId=54689871-917b-4fe1-a9dc-af496126a68a, reason=INACTIVITY_TIMEOUT]. (requestId=be46e4b9-fa08-4a17-837a-96f9163980b8)\", grpc_status:9, created_time:\"2026-01-08T20:29:44.051558111+00:00\"}\"\n>\nWARNING:unitycatalog.ai.core.utils.retry_utils:Session expired. Retrying attempt 1 of 5. Refreshing session and retrying after 1 seconds...\nINFO:unitycatalog.ai.core.databricks:Refreshing Databricks client and Spark session due to session expiration.\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 20:29:45,142 5046 INFO execute_command Execute command for command \n2026-01-08 20:29:45,142 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 20:29:45,145 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:45,145 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:45,146 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:45,146 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 20:29:45,522 5046 INFO to_table Executing plan \n2026-01-08 20:29:45,522 5046 INFO to_table Executing plan \nINFO:pyspark.sql.connect.client.logging:Executing plan \n2026-01-08 20:29:45,525 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:45,525 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:45,527 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:45,527 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\nINFO:unitycatalog.ai.core.utils.retry_utils:Successfully re-acquired connection to a serverless instance.\nINFO:py4j.clientserver:Closing down clientserver connection\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 20:29:47,184 5046 INFO execute_command Execute command for command \n2026-01-08 20:29:47,184 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 20:29:47,187 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:47,187 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:47,188 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:47,188 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 20:29:47,327 5046 INFO to_table Executing plan \n2026-01-08 20:29:47,327 5046 INFO to_table Executing plan \nINFO:pyspark.sql.connect.client.logging:Executing plan \n2026-01-08 20:29:47,330 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:47,330 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:47,332 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:47,332 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\nINFO:unitycatalog.ai.core.utils.retry_utils:Successfully re-acquired connection to a serverless instance.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\n2026/01/08 20:29:54 WARNING mlflow.tracing.fluent: Failed to start span predict_stream: 'NonRecordingSpan' object has no attribute 'context'. For full traceback, set logging level to debug.\nINFO:databricks_ai_bridge.lakebase:lakebase pool ready: host=instance-cd00746e-b544-45c8-9f08-5062a0858c7d.database.cloud.databricks.com db=databricks_postgres min=1 max=10 cache=3000s\n2026/01/08 20:29:55 WARNING mlflow.tracing.fluent: Failed to start span LangGraph: 'NonRecordingSpan' object has no attribute 'context'. For full traceback, set logging level to debug.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 20:29:56,550 5046 INFO execute_command Execute command for command \n2026-01-08 20:29:56,550 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 20:29:56,561 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:56,561 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:56,562 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:56,562 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 20:29:56,696 5046 INFO to_table Executing plan \n2026-01-08 20:29:56,696 5046 INFO to_table Executing plan \nINFO:pyspark.sql.connect.client.logging:Executing plan \n2026-01-08 20:29:56,701 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:56,701 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:56,702 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:56,702 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\nINFO:unitycatalog.ai.core.utils.retry_utils:Successfully re-acquired connection to a serverless instance.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\nINFO:unitycatalog.ai.core.databricks:Using databricks connect to execute functions with serverless compute.\n2026-01-08 20:29:57,699 5046 INFO execute_command Execute command for command \n2026-01-08 20:29:57,699 5046 INFO execute_command Execute command for command \nINFO:pyspark.sql.connect.client.logging:Execute command for command \n2026-01-08 20:29:57,702 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:57,702 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:57,704 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:57,704 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\n2026-01-08 20:29:57,835 5046 INFO to_table Executing plan \n2026-01-08 20:29:57,835 5046 INFO to_table Executing plan \nINFO:pyspark.sql.connect.client.logging:Executing plan \n2026-01-08 20:29:57,839 5046 INFO _execute_and_fetch ExecuteAndFetch\n2026-01-08 20:29:57,839 5046 INFO _execute_and_fetch ExecuteAndFetch\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetch\n2026-01-08 20:29:57,840 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\n2026-01-08 20:29:57,840 5046 INFO _execute_and_fetch_as_iterator ExecuteAndFetchAsIterator\nINFO:pyspark.sql.connect.client.logging:ExecuteAndFetchAsIterator\nINFO:unitycatalog.ai.core.utils.retry_utils:Successfully re-acquired connection to a serverless instance.\nINFO:httpx:HTTP Request: POST https://e2-demo-field-eng.cloud.databricks.com/serving-endpoints/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + } + ], + "source": [ + "# Determine Databricks resources to specify for automatic auth passthrough at deployment time\n", + "import mlflow\n", + "from databricks_langchain import VectorSearchRetrieverTool\n", + "from mlflow.models.resources import (\n", + " DatabricksFunction,\n", + " DatabricksServingEndpoint,\n", + " DatabricksLakebase,\n", + " DatabricksVectorSearchIndex,\n", + ") # we are adding DatabricksLakebase resource type\n", + "from mlflow.models.auth_policy import AuthPolicy, SystemAuthPolicy, UserAuthPolicy\n", + "from unitycatalog.ai.langchain.toolkit import UnityCatalogTool\n", + "from agent import LLM_ENDPOINT_NAME, LAKEBASE_INSTANCE_NAME, tools\n", + "from pkg_resources import get_distribution\n", + "\n", + "# TODO: Manually include additional underlying resources if needed and update values for endpoint/lakebase\n", + "resources = [\n", + " DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),\n", + " DatabricksLakebase(database_instance_name=LAKEBASE_INSTANCE_NAME),\n", + "]\n", + "for tool in tools:\n", + " if isinstance(tool, VectorSearchRetrieverTool):\n", + " resources.extend(tool.resources)\n", + " elif isinstance(tool, UnityCatalogTool):\n", + " resources.append(DatabricksFunction(function_name=tool.uc_function_name))\n", + "\n", + "# System policy: resources accessed with system credentials\n", + "system_policy = SystemAuthPolicy(resources=resources)\n", + "\n", + "# User policy: API scopes for OBO access\n", + "api_scopes = [\n", + " \"sql.statement-execution\",\n", + " \"mcp.genie\",\n", + " \"mcp.external\",\n", + " \"catalog.connections\",\n", + " \"mcp.vectorsearch\",\n", + " \"vectorsearch.vector-search-indexes\",\n", + " \"iam.current-user:read\",\n", + " \"sql.warehouses\",\n", + " \"dashboards.genie\",\n", + " \"serving.serving-endpoints\",\n", + " \"iam.access-control:read\",\n", + " \"apps.apps\",\n", + " \"mcp.functions\",\n", + " \"vectorsearch.vector-search-endpoints\",\n", + "]\n", + "user_policy = UserAuthPolicy(api_scopes=api_scopes)\n", + "\n", + "input_example = {\n", + " \"input\": [{\"role\": \"user\", \"content\": \"What is an LLM agent?\"}],\n", + " \"custom_inputs\": {\"thread_id\": \"example-thread-123\"},\n", + "}\n", + "\n", + "with mlflow.start_run():\n", + " logged_agent_info = mlflow.pyfunc.log_model(\n", + " name=\"agent\",\n", + " python_model=\"agent.py\",\n", + " input_example=input_example,\n", + " pip_requirements=[\n", + " f\"databricks-langchain[memory]=={get_distribution('databricks-langchain[memory]').version}\",\n", + " ],\n", + " resources=resources,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "a986456a-f881-46e3-99fe-99b0ef7758ef", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "228308fcfee745a48056fcf07cc45345", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading artifacts: 0%| | 0/1 [00:00), budget_policy_id=None, compute_status=ComputeStatus(message='App compute is running.', state=), create_time='2026-01-08T21:40:00Z', creator='bo.cheng@databricks.com', default_source_code_path='', description='example Databricks App to integrate with KA serving endpoint', effective_budget_policy_id='', effective_user_api_scopes=['iam.current-user:read', 'serving.serving-endpoints', 'iam.access-control:read'], id='ee16e127-d9d5-48f0-b350-4c08ca4b0732', oauth2_app_client_id='357c4077-6d38-4907-ae5a-ef850e96eb15', oauth2_app_integration_id='357c4077-6d38-4907-ae5a-ef850e96eb15', pending_deployment=None, resources=[], service_principal_client_id='ee16e127-d9d5-48f0-b350-4c08ca4b0732', service_principal_id=74806834790771, service_principal_name='app-40zbx9 bo-lakebase-memory-app-blog', update_time='2026-01-08T21:42:18Z', updater='bo.cheng@databricks.com', url='https://bo-lakebase-memory-app-blog-1444828305810485.aws.databricksapps.com', user_api_scopes=['serving.serving-endpoints'])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from databricks.sdk.service.apps import App\n", + "\n", + "app = App(\n", + " name=dbutils.widgets.get(\"app_name\"),\n", + " description=\"example Databricks App to integrate with KA serving endpoint\",\n", + " user_api_scopes=[\"serving.serving-endpoints\"],\n", + ")\n", + "w.apps.create_and_wait(app=app)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "7b15d88b-89cd-41b0-a6c0-2f3e6e2e4566", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "AppDeployment(create_time='2026-01-08T21:56:59Z', creator='bo.cheng@databricks.com', deployment_artifacts=AppDeploymentArtifacts(source_code_path='/Workspace/Users/ee16e127-d9d5-48f0-b350-4c08ca4b0732/src/01f0ecdcf4ec19b5a9e076ad547e9dda'), deployment_id='01f0ecdcf4ec19b5a9e076ad547e9dda', mode=, source_code_path='/Workspace/Users/bo.cheng@databricks.com/bo-cheng-dnb-demos-personal/lakebase-memory-accelerator/databricks_apps/streamlit-chatbot-app', status=AppDeploymentStatus(message='App started successfully', state=), update_time='2026-01-08T21:57:03Z')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from databricks.sdk.service.apps import AppDeployment\n", + "\n", + "# replace source_code_path with your own\n", + "app_deployment = AppDeployment(\n", + " source_code_path=dbutils.widgets.get(\"source_code_path\")\n", + ")\n", + "w.apps.deploy_and_wait(\n", + " app_name=dbutils.widgets.get(\"app_name\"), app_deployment=app_deployment\n", + ")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "03-deploy-run-databricks-app", + "widgets": { + "app_name": { + "currentValue": "bo-lakebase-memory-app-blog", + "nuid": "4c404360-98df-45e2-a19b-ee5e3175080f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "bo-lakebase-memory-app", + "label": "app_name", + "name": "app_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "bo-lakebase-memory-app", + "label": "app_name", + "name": "app_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "source_code_path": { + "currentValue": "/Workspace/Users/bo.cheng@databricks.com/bo-cheng-dnb-demos-personal/lakebase-memory-accelerator/databricks_apps/streamlit-chatbot-app", + "nuid": "f86777eb-5208-4f0d-8051-37efafd34cbb", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "/Workspace/Users/bo.cheng@databricks.com/bo-cheng-dnb-demos-personal/lakebase-memory-accelerator/databricks_apps/streamlit-chatbot-app", + "label": "source_code_path", + "name": "source_code_path", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "/Workspace/Users/bo.cheng@databricks.com/bo-cheng-dnb-demos-personal/lakebase-memory-accelerator/databricks_apps/streamlit-chatbot-app", + "label": "source_code_path", + "name": "source_code_path", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/LICENSE.md b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/LICENSE.md new file mode 100644 index 0000000..7d2739d --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/LICENSE.md @@ -0,0 +1,24 @@ +# DB license +Definitions. + +Agreement: The agreement between Databricks, Inc., and you governing the use of the Databricks Services, as that term is defined in the Master Cloud Services Agreement (MCSA) located at www.databricks.com/legal/mcsa. + +Licensed Materials: The source code, object code, data, and/or other works to which this license applies. + +Scope of Use. You may not use the Licensed Materials except in connection with your use of the Databricks Services pursuant to the Agreement. Your use of the Licensed Materials must comply at all times with any restrictions applicable to the Databricks Services, generally, and must be used in accordance with any applicable documentation. You may view, use, copy, modify, publish, and/or distribute the Licensed Materials solely for the purposes of using the Licensed Materials within or connecting to the Databricks Services. If you do not agree to these terms, you may not view, use, copy, modify, publish, and/or distribute the Licensed Materials. + +Redistribution. You may redistribute and sublicense the Licensed Materials so long as all use is in compliance with these terms. In addition: + +You must give any other recipients a copy of this License; +You must cause any modified files to carry prominent notices stating that you changed the files; +You must retain, in any derivative works that you distribute, all copyright, patent, trademark, and attribution notices, excluding those notices that do not pertain to any part of the derivative works; and +If a "NOTICE" text file is provided as part of its distribution, then any derivative works that you distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the derivative works. +You may add your own copyright statement to your modifications and may provide additional license terms and conditions for use, reproduction, or distribution of your modifications, or for any such derivative works as a whole, provided your use, reproduction, and distribution of the Licensed Materials otherwise complies with the conditions stated in this License. + +Termination. This license terminates automatically upon your breach of these terms or upon the termination of your Agreement. Additionally, Databricks may terminate this license at any time on notice. Upon termination, you must permanently delete the Licensed Materials and all copies thereof. + +DISCLAIMER; LIMITATION OF LIABILITY. + +THE LICENSED MATERIALS ARE PROVIDED “AS-IS” AND WITH ALL FAULTS. DATABRICKS, ON BEHALF OF ITSELF AND ITS LICENSORS, SPECIFICALLY DISCLAIMS ALL WARRANTIES RELATING TO THE LICENSED MATERIALS, EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, IMPLIED WARRANTIES, CONDITIONS AND OTHER TERMS OF MERCHANTABILITY, SATISFACTORY QUALITY OR FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. DATABRICKS AND ITS LICENSORS TOTAL AGGREGATE LIABILITY RELATING TO OR ARISING OUT OF YOUR USE OF OR DATABRICKS’ PROVISIONING OF THE LICENSED MATERIALS SHALL BE LIMITED TO ONE THOUSAND ($1,000) DOLLARS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED MATERIALS OR THE USE OR OTHER DEALINGS IN THE LICENSED MATERIALS. + +For the latest text of this license: https://www.databricks.com/legal/db-license diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/README.md b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/README.md new file mode 100644 index 0000000..55dc774 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/README.md @@ -0,0 +1,209 @@ +# Lakebase Memory Accelerator + +A comprehensive solution accelerator demonstrating how to build stateful AI agents using Databricks Lakebase (PostgreSQL) and LangGraph for persistent conversation memory and state management. + +## Overview + +This accelerator showcases how to build conversational AI agents that maintain context across multiple interactions using Databricks Lakebase as a checkpoint store. Unlike stateless LLM calls, these agents preserve conversation history and can resume from any point in time using thread IDs. + +### Key Features + +- **Persistent Memory**: Conversation state stored in Databricks Lakebase (PostgreSQL) +- **Thread-based Sessions**: Each conversation tracked with unique thread IDs +- **Resumable Conversations**: Pick up where you left off in any conversation +- **Unity Catalog Integration**: Leverage UC functions as agent tools +- **Production-ready Deployment**: Complete MLflow model registration and serving +- **Interactive Chat Interface**: Streamlit-based web application + +## Architecture + +The solution uses: +- **Lakebase**: Managed PostgreSQL for durable agent state storage +- **LangGraph**: State graph framework with PostgreSQL checkpointer +- **MLflow**: Model tracking, registration, and deployment +- **Unity Catalog**: Function toolkit for agent tools +- **Databricks Model Serving**: Production deployment platform + +## Directory Structure + +``` +lakebase-memory-accelerator/ +├── README.md # This file +├── 00-data-uc-function-setup.ipynb # Unity Catalog functions setup +├── 01-lakebase-instance-setup.ipynb # Lakebase PostgreSQL instance creation +├── 02-lakebase-langgraph-checkpointer-agent.ipynb # Main agent implementation +├── agent.py # LangGraph agent class implementation +├── checkpoints-example-query.dbquery.ipynb # Example checkpoint queries +├── data/ # Sample datasets +│ ├── cyber_threat_detection.snappy.parquet # Cybersecurity threat data +│ └── user_info.snappy.parquet # User information data +├── databricks_apps/ # Streamlit web application +│ ├── LICENSE +│ ├── NOTICE +│ ├── README.md +│ └── streamlit-chatbot-app/ +│ ├── app.py # Streamlit chat interface +│ ├── app.yaml # App configuration +│ ├── model_serving_utils.py # Model serving utilities +│ └── requirements.txt # Python dependencies +└── resources/ # Databricks bundle configurations + ├── lakebase_instance.yml # Example DABs Lakebase instance config + ├── short_term_memory_agent_job.yml # Example DABs Job deployment config + └── short_term_memory_app.yml # Example DABs App deployment config +``` + +## Getting Started + +### Prerequisites + +1. **Databricks Workspace** with Unity Catalog enabled +2. **Lakebase Instance** - Create via SQL Warehouses → Lakebase Postgres → Create database instance +3. **Model Serving Permissions** for agent deployment +4. **Secret Scope** for storing credentials (default: `dbdemos`) + +### Setup Instructions + +#### Step 1: Data and Functions Setup +Run `00-data-uc-function-setup.ipynb` to: +- Create sample datasets in Unity Catalog +- Set up Unity Catalog functions as agent tools +- Configure cybersecurity threat detection functions + +#### Step 2: Lakebase Instance Setup +Run `01-lakebase-instance-setup.ipynb` to: +- Create Lakebase PostgreSQL instance +- Configure database roles and permissions +- Set up database catalog integration + +#### Step 3: Agent Development and Deployment +Run `02-lakebase-langgraph-checkpointer-agent.ipynb` to: +- Build the stateful LangGraph agent +- Configure databricks-langchain checkpointer +- Test agent locally with conversation threads +- Register model to Unity Catalog +- Deploy to Databricks Model Serving + +#### Step 4: Web Application Deployment +Deploy the Streamlit chat interface: +- Configure thread ID management in sidebar +- Connect to deployed agent endpoint +- Enable persistent conversation sessions + +## Core Components + +### LangGraphChatAgent Class + +The main agent implementation (`agent.py`) features: +- **Conversation Checkpointing**: State persistence after each agent step +- **Tool Integration**: Unity Catalog tools + +### Available Tools + +- `get_cyber_threat_info`: Retrieve cybersecurity threat information +- `get_user_info`: Get user details from threat source IPs +- Optional: Vector Search retrieval tools + +## Usage Examples + +### Basic Agent Interaction + +```python +from agent import AGENT + +response = AGENT.predict({ + "messages": [{"role": "user", "content": "Who committed the latest malware threat?"}], + "custom_inputs": {"thread_id": "conversation-123"} +}) +``` + +### Resuming Conversations + +```python +# Continue previous conversation using same thread_id +response = AGENT.predict({ + "messages": [{"role": "user", "content": "What was their IP address?"}], + "custom_inputs": {"thread_id": "conversation-123"} # Same thread ID +}) +``` + +### Streamlit App Usage + +1. Open the deployed Databricks App +2. Configure thread ID in sidebar (auto-generated or custom) +3. Start conversation with cybersecurity queries +4. Agent maintains context across multiple messages + +## Deployment Options + +### 1. Databricks Model Serving +- Automatic scaling and high availability +- Built-in authentication and authorization +- Integrated monitoring and logging + +### 2. Databricks Apps +- Interactive web interface +- Custom thread ID management +- Real-time conversation experience + +### 3. Job Scheduling +- Automated agent training/updates +- Batch processing capabilities +- Resource optimization + +## Monitoring and Observability + +### Conversation Queries +Use `checkpoints-example-query.dbquery.ipynb` to: +- Analyze conversation patterns +- Debug agent behavior +- Monitor checkpoint storage + +### MLflow Tracking +- Model versioning and lineage +- Performance metrics +- Experiment comparison + +## Security and Governance + +- **Unity Catalog Integration**: Data governance and permissions +- **OAuth Authentication**: Secure Lakebase connections +- **Secret Management**: Encrypted credential storage +- **Audit Logging**: Complete conversation tracking + +## Customization + +### Adding New Tools +1. Create Unity Catalog functions +2. Add to `uc_tool_names` list in `agent.py` +3. Update system prompt to include tool usage + +### Modifying Agent Behavior +1. Update `SYSTEM_PROMPT` in configuration +2. Adjust tool selection logic +3. Customize conversation flow in LangGraph + +## Troubleshooting + +### Common Issues +1. **Thread Management**: Verify thread_id persistence in application state +2. **Tool Permissions**: Check Unity Catalog function access rights +3. **Model Serving**: Validate endpoint deployment and health + +### Debug Resources +- MLflow experiment tracking for model behavior +- Lakebase query logs for connection issues +- Databricks job logs for deployment problems + +## Next Steps + +1. **Production Hardening**: Implement monitoring, alerting, and backup strategies +2. **Advanced Tools**: Add vector search, external APIs, or custom functions +3. **Multi-tenant Support**: Implement user-specific thread isolation +4. **Performance Optimization**: Fine-tune connection pooling and caching + +## Documentation Links + +- [Databricks AI Agent Memory Documentation](https://docs.databricks.com/aws/en/generative-ai/agent-framework/stateful-agents#example-notebook) +- [LangGraph Checkpoint Documentation](https://langchain-ai.github.io/langgraph/concepts/persistence/) +- [Databricks Agent Framework](https://docs.databricks.com/en/generative-ai/agent-framework/) +- [Unity Catalog Functions](https://docs.databricks.com/en/sql/language-manual/sql-ref-functions.html) \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/agent.py b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/agent.py new file mode 100644 index 0000000..0deb2df --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/agent.py @@ -0,0 +1,223 @@ +import logging +import os +import uuid +from typing import Annotated, Any, Generator, Optional, Sequence, TypedDict + +import mlflow +from databricks_langchain import ( + ChatDatabricks, + UCFunctionToolkit, + CheckpointSaver, +) +from databricks.sdk import WorkspaceClient +from langchain_core.messages import AIMessage, AIMessageChunk, AnyMessage +from langchain_core.runnables import RunnableConfig, RunnableLambda +from langgraph.graph import END, StateGraph +from langgraph.graph.message import add_messages +from langgraph.prebuilt.tool_node import ToolNode +from mlflow.pyfunc import ResponsesAgent +from mlflow.types.responses import ( + ResponsesAgentRequest, + ResponsesAgentResponse, + ResponsesAgentStreamEvent, + output_to_responses_items_stream, +) + +logger = logging.getLogger(__name__) +logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO")) + +############################################ +# Define your LLM endpoint and system prompt +############################################ +# TODO: Replace with your model serving endpoint +LLM_ENDPOINT_NAME = "databricks-gpt-5-2" + +# TODO: Update with your system prompt +SYSTEM_PROMPT = """ + You are an cybersecurity assistant. + You are given a task and you must complete it. + Use the following routine to support the customer. + # Routine: + 1. Provide the get_cyber_threat_info tool the type of threat being asked about. + 2. Use the source ip address provided in step 1 as input for the get_user_info tool to retrieve user specific info. + Use the following tools to complete the task: + {tools}""" + +############################################ +# Lakebase configuration +############################################ +# TODO: Fill in Lakebase instance name +LAKEBASE_INSTANCE_NAME = "bo-test-lakebase-3" + +############################################################################### +## Define tools for your agent,enabling it to retrieve data or take actions +## beyond text generation +## To create and see usage examples of more tools, see +## https://docs.databricks.com/en/generative-ai/agent-framework/agent-tool.html +############################################################################### +tools = [] + +# Example UC tools; add your own as needed +UC_TOOL_NAMES: list[str] = [ + "bo_cheng_dnb_demos.agents.get_cyber_threat_info", + "bo_cheng_dnb_demos.agents.get_user_info", +] +if UC_TOOL_NAMES: + uc_toolkit = UCFunctionToolkit(function_names=UC_TOOL_NAMES) + tools.extend(uc_toolkit.tools) + +# Use Databricks vector search indexes as tools +# See https://docs.databricks.com/en/generative-ai/agent-framework/unstructured-retrieval-tools.html#locally-develop-vector-search-retriever-tools-with-ai-bridge +# List to store vector search tool instances for unstructured retrieval. +VECTOR_SEARCH_TOOLS = [] + +# To add vector search retriever tools, +# use VectorSearchRetrieverTool and create_tool_info, +# then append the result to TOOL_INFOS. +# Example: +# VECTOR_SEARCH_TOOLS.append( +# VectorSearchRetrieverTool( +# index_name="", +# # filters="..." +# ) +# ) + +tools.extend(VECTOR_SEARCH_TOOLS) + +##################### +## Define agent logic +##################### + + +class AgentState(TypedDict): + messages: Annotated[Sequence[AnyMessage], add_messages] + custom_inputs: Optional[dict[str, Any]] + custom_outputs: Optional[dict[str, Any]] + + +class LangGraphResponsesAgent(ResponsesAgent): + """Stateful agent using ResponsesAgent with pooled Lakebase checkpointing.""" + + def __init__(self, lakebase_config: dict[str, Any]): + self.workspace_client = WorkspaceClient() + + self.model = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME) + self.system_prompt = SYSTEM_PROMPT + self.model_with_tools = self.model.bind_tools(tools) if tools else self.model + + def _create_graph(self, checkpointer: Any): + def should_continue(state: AgentState): + messages = state["messages"] + last_message = messages[-1] + if isinstance(last_message, AIMessage) and last_message.tool_calls: + return "continue" + return "end" + + preprocessor = ( + RunnableLambda( + lambda state: [{"role": "system", "content": self.system_prompt}] + + state["messages"] + ) + if self.system_prompt + else RunnableLambda(lambda state: state["messages"]) + ) + model_runnable = preprocessor | self.model_with_tools + + def call_model(state: AgentState, config: RunnableConfig): + response = model_runnable.invoke(state, config) + return {"messages": [response]} + + workflow = StateGraph(AgentState) + workflow.add_node("agent", RunnableLambda(call_model)) + + if tools: + workflow.add_node("tools", ToolNode(tools)) + workflow.add_conditional_edges( + "agent", should_continue, {"continue": "tools", "end": END} + ) + workflow.add_edge("tools", "agent") + else: + workflow.add_edge("agent", END) + + workflow.set_entry_point("agent") + return workflow.compile(checkpointer=checkpointer) + + def _get_or_create_thread_id(self, request: ResponsesAgentRequest) -> str: + """Get thread_id from request or create a new one. + + Priority: + 1. Use thread_id from custom_inputs if present + 2. Use conversation_id from chat context if available + 3. Generate a new UUID + + Returns: + thread_id: The thread identifier to use for this conversation + """ + ci = dict(request.custom_inputs or {}) + + if "thread_id" in ci: + return ci["thread_id"] + + # using conversation id from chat context as thread id + # https://mlflow.org/docs/latest/api_reference/python_api/mlflow.types.html#mlflow.types.agent.ChatContext + if request.context and getattr(request.context, "conversation_id", None): + return request.context.conversation_id + + # Generate new thread_id + return str(uuid.uuid4()) + + def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse: + outputs = [ + event.item + for event in self.predict_stream(request) + if event.type == "response.output_item.done" + ] + return ResponsesAgentResponse( + output=outputs, custom_outputs=request.custom_inputs + ) + + def predict_stream( + self, request: ResponsesAgentRequest + ) -> Generator[ResponsesAgentStreamEvent, None, None]: + thread_id = self._get_or_create_thread_id(request) + ci = dict(request.custom_inputs or {}) + ci["thread_id"] = thread_id + request.custom_inputs = ci + + # Convert incoming Responses messages to ChatCompletions format + # LangChain will automatically convert from ChatCompletions to LangChain format + cc_msgs = self.prep_msgs_for_cc_llm([i.model_dump() for i in request.input]) + langchain_msgs = cc_msgs + checkpoint_config = {"configurable": {"thread_id": thread_id}} + + with CheckpointSaver(instance_name=LAKEBASE_INSTANCE_NAME) as checkpointer: + graph = self._create_graph(checkpointer) + + for event in graph.stream( + {"messages": langchain_msgs}, + checkpoint_config, + stream_mode=["updates", "messages"], + ): + if event[0] == "updates": + for node_data in event[1].values(): + if len(node_data.get("messages", [])) > 0: + yield from output_to_responses_items_stream( + node_data["messages"] + ) + elif event[0] == "messages": + try: + chunk = event[1][0] + if isinstance(chunk, AIMessageChunk) and chunk.content: + yield ResponsesAgentStreamEvent( + **self.create_text_delta( + delta=chunk.content, item_id=chunk.id + ), + ) + except Exception as exc: + logger.error("Error streaming chunk: %s", exc) + + +# ----- Export model ----- +mlflow.langchain.autolog() +AGENT = LangGraphResponsesAgent(LAKEBASE_INSTANCE_NAME) +mlflow.models.set_model(AGENT) diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/checkpoints-example-query.dbquery.ipynb b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/checkpoints-example-query.dbquery.ipynb new file mode 100644 index 0000000..b5ed54b --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/checkpoints-example-query.dbquery.ipynb @@ -0,0 +1,54 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "e0badf2e-d84d-4c82-bd12-f8c7fd45b943", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "SELECT\n", + " thread_id,\n", + " metadata,\n", + " checkpoint,\n", + " checkpoint_id parent_checkpoint_id,\n", + " checkpoint_ns,\n", + " type\n", + "FROM\n", + " `lakebase-catalog-name`.public.checkpoints;" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "sql", + "notebookMetadata": { + "pythonIndentUnit": 2, + "sqlQueryOptions": { + "applyAutoLimit": true, + "catalog": "bo-test-lakebase-catalog", + "schema": "public" + } + }, + "notebookName": "checkpoints-example-query.dbquery.ipynb", + "widgets": {} + }, + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/data/cyber_threat_detection.snappy.parquet b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/data/cyber_threat_detection.snappy.parquet new file mode 100644 index 0000000..4dd6fa0 Binary files /dev/null and b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/data/cyber_threat_detection.snappy.parquet differ diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/data/user_info.snappy.parquet b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/data/user_info.snappy.parquet new file mode 100644 index 0000000..ee0c74b Binary files /dev/null and b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/data/user_info.snappy.parquet differ diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/.gitignore b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/.gitignore new file mode 100644 index 0000000..7f6a479 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/.gitignore @@ -0,0 +1,142 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Visual Studio Code +.vscode/ + +# IntelliJ +.idea/ +ml-models.iws +ml-models.iml +ml-models.ipr + +.DS_Store +.databricks +.gradio diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/LICENSE b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/LICENSE new file mode 100644 index 0000000..7b7d6e2 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/LICENSE @@ -0,0 +1,22 @@ +DB license +Definitions. + +Agreement: The agreement between Databricks, Inc., and you governing the use of the Databricks Services, as that term is defined in the Master Cloud Services Agreement (MCSA) located at www.databricks.com/legal/mcsa. + +Licensed Materials: The source code, object code, data, and/or other works to which this license applies. + +Scope of Use. You may not use the Licensed Materials except in connection with your use of the Databricks Services pursuant to the Agreement. Your use of the Licensed Materials must comply at all times with any restrictions applicable to the Databricks Services, generally, and must be used in accordance with any applicable documentation. You may view, use, copy, modify, publish, and/or distribute the Licensed Materials solely for the purposes of using the Licensed Materials within or connecting to the Databricks Services. If you do not agree to these terms, you may not view, use, copy, modify, publish, and/or distribute the Licensed Materials. + +Redistribution. You may redistribute and sublicense the Licensed Materials so long as all use is in compliance with these terms. In addition: + +You must give any other recipients a copy of this License; +You must cause any modified files to carry prominent notices stating that you changed the files; +You must retain, in any derivative works that you distribute, all copyright, patent, trademark, and attribution notices, excluding those notices that do not pertain to any part of the derivative works; and +If a "NOTICE" text file is provided as part of its distribution, then any derivative works that you distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the derivative works. +You may add your own copyright statement to your modifications and may provide additional license terms and conditions for use, reproduction, or distribution of your modifications, or for any such derivative works as a whole, provided your use, reproduction, and distribution of the Licensed Materials otherwise complies with the conditions stated in this License. + +Termination. This license terminates automatically upon your breach of these terms or upon the termination of your Agreement. Additionally, Databricks may terminate this license at any time on notice. Upon termination, you must permanently delete the Licensed Materials and all copies thereof. + +DISCLAIMER; LIMITATION OF LIABILITY. + +THE LICENSED MATERIALS ARE PROVIDED “AS-IS” AND WITH ALL FAULTS. DATABRICKS, ON BEHALF OF ITSELF AND ITS LICENSORS, SPECIFICALLY DISCLAIMS ALL WARRANTIES RELATING TO THE LICENSED MATERIALS, EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, IMPLIED WARRANTIES, CONDITIONS AND OTHER TERMS OF MERCHANTABILITY, SATISFACTORY QUALITY OR FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. DATABRICKS AND ITS LICENSORS TOTAL AGGREGATE LIABILITY RELATING TO OR ARISING OUT OF YOUR USE OF OR DATABRICKS’ PROVISIONING OF THE LICENSED MATERIALS SHALL BE LIMITED TO ONE THOUSAND ($1,000) DOLLARS. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED MATERIALS OR THE USE OR OTHER DEALINGS IN THE LICENSED MATERIALS. \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/NOTICE b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/NOTICE new file mode 100644 index 0000000..28f2019 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/NOTICE @@ -0,0 +1,34 @@ +Copyright (2024) Databricks, Inc. + +This Software includes software developed at Databricks (https://www.databricks.com/) and its use is subject to the included LICENSE file. + +________________ +This Software contains code from the following open source projects, licensed under the Apache 2.0 license: + +gradio-app/gradio - https://github.com/gradio-app/gradio +Copyright gradio authors +License - https://github.com/gradio-app/gradio/blob/main/LICENSE + +streamlit/streamlit - https://github.com/streamlit/streamlit +Copyright streamlit authors +License - https://github.com/streamlit/streamlit/blob/develop/LICENSE +Notice - https://github.com/streamlit/streamlit/blob/develop/NOTICES + +databricks/databricks-sdk-py - https://github.com/databricks/databricks-sdk-py +Copyright (2023) Databricks, Inc. +License - https://github.com/databricks/databricks-sdk-py/blob/main/LICENSE +Notice - https://github.com/databricks/databricks-sdk-py/blob/main/NOTICE + +________________ +This Software contains code from the following open source projects, licensed under the MIT license: + +plotly/dash - https://github.com/plotly/dash +Copyright (c) 2023 Plotly, Inc +License - https://github.com/plotly/dash/blob/dev/LICENSE + +________________ +This Software contains code from the following open source projects, licensed under the BSD 3-clause license: + +pallets/flask - https://github.com/pallets/flask +Copyright 2010 Pallets +License - https://github.com/pallets/flask/blob/main/LICENSE.txt diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/README.md b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/README.md new file mode 100644 index 0000000..47367f9 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/README.md @@ -0,0 +1 @@ +# app-templates \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/.streamlit/config.toml b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/.streamlit/config.toml new file mode 100644 index 0000000..864c27a --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/.streamlit/config.toml @@ -0,0 +1,25 @@ +[server] +enableStaticServing = true + +[theme] +primaryColor = "#cb785c" +backgroundColor = "#fdfdf8" +secondaryBackgroundColor = "#ecebe3" +textColor = "#3d3a2a" +linkColor = "#3d3a2a" +borderColor = "#d3d2ca" +showWidgetBorder = true +baseRadius = "0.75rem" +buttonRadius = "full" +headingFontWeights = [600,500,500,500,500,500] +headingFontSizes = ["3rem", "2rem"] +codeFontSize = ".75rem" +codeBackgroundColor = "#ecebe4" +showSidebarBorder = true +chartCategoricalColors = ["#0ea5e9", "#059669", "#fbbf24"] + +[theme.sidebar] +backgroundColor = "#f0f0ec" +secondaryBackgroundColor = "#ecebe3" +headingFontSizes = ["1.6rem", "1.4rem", "1.2rem"] +dataframeHeaderBackgroundColor = "#e4e4e0" \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/app.py b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/app.py new file mode 100644 index 0000000..68ffbb7 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/app.py @@ -0,0 +1,118 @@ +import logging +import os +import streamlit as st +import time +import uuid +from typing import Generator +from model_serving_utils import query_endpoint, is_endpoint_supported + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Ensure environment variable is set correctly +SERVING_ENDPOINT = os.getenv("SERVING_ENDPOINT") +assert SERVING_ENDPOINT, ( + "Unable to determine serving endpoint to use for chatbot app. If developing locally, " + "set the SERVING_ENDPOINT environment variable to the name of your serving endpoint. If " + "deploying to a Databricks app, include a serving endpoint resource named " + "'serving_endpoint' with CAN_QUERY permissions, as described in " + "https://docs.databricks.com/aws/en/generative-ai/agent-framework/chat-app#deploy-the-databricks-app" +) + +# Check if the endpoint is supported +endpoint_supported = is_endpoint_supported(SERVING_ENDPOINT) + +# Configure Streamlit page +st.set_page_config( + page_title="Databricks Cybersecurity Agent", + page_icon="🛡", + layout="centered", +) + + +def get_user_info(): + headers = st.context.headers + return dict( + user_name=headers.get("X-Forwarded-Preferred-Username"), + user_email=headers.get("X-Forwarded-Email"), + user_id=headers.get("X-Forwarded-User"), + ) + + +user_info = get_user_info() + +# Streamlit app +if "visibility" not in st.session_state: + st.session_state.visibility = "visible" + st.session_state.disabled = False + +# Initialization +if "thread_id" not in st.session_state: + st.session_state["thread_id"] = str(uuid.uuid4()) + +st.title("🛡 Databricks Cybersecurity Agent") +st.markdown( + """ + This AI assistant is powered by Databricks Foundation Models and features: + - 💾 Conversation memory with Lakebase + """ +) + +# Sidebar for thread configuration +with st.sidebar: + st.header("Configuration") + thread_id = st.text_input( + "Thread ID", value=None, help="Unique identifier for this conversation thread" + ) + +# Check if endpoint is supported and show appropriate UI +if not endpoint_supported: + st.error("⚠️ Unsupported Endpoint Type") + st.markdown( + f"The endpoint `{SERVING_ENDPOINT}` is not compatible with this basic chatbot template.\n\n" + "This template only supports chat completions-compatible endpoints.\n\n" + "👉 **For a richer chatbot template** that supports all conversational endpoints on Databricks, " + "please see the [Databricks documentation](https://docs.databricks.com/aws/en/generative-ai/agent-framework/chat-app)." + ) +else: + # st.markdown( + # "ℹ️ This is a simple example. See " + # "[Databricks docs](https://docs.databricks.com/aws/en/generative-ai/agent-framework/chat-app) " + # "for a more comprehensive example with streaming output and more." + # ) + + # Initialize chat history + if "messages" not in st.session_state: + st.session_state.messages = [] + + # Display chat messages from history on app rerun + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + + # Accept user input + if prompt := st.chat_input("What cybersecurity event are you concerned about?"): + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": prompt}) + # Display user message in chat message container + with st.chat_message("user"): + st.markdown(prompt) + + # Display assistant response in chat message container + with st.chat_message("assistant"): + if thread_id is None: + thread_id = st.session_state["thread_id"] + # Query the Databricks serving endpoint + assistant_response = query_endpoint( + endpoint_name=SERVING_ENDPOINT, + messages=[st.session_state.messages[-1]], + max_tokens=400, + thread_id=thread_id, + )["text"] + st.markdown(assistant_response) + + # Add assistant response to chat history + st.session_state.messages.append( + {"role": "assistant", "content": assistant_response} + ) diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/app.yaml b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/app.yaml new file mode 100644 index 0000000..ab0b3f3 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/app.yaml @@ -0,0 +1,11 @@ +command: [ + "streamlit", + "run", + "app.py" +] + +env: + - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS + value: "false" + - name: "SERVING_ENDPOINT" + value: "agents_bo_cheng_dnb_demos-agents-memory_agent" diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/model_serving_utils.py b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/model_serving_utils.py new file mode 100644 index 0000000..48e91a1 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/model_serving_utils.py @@ -0,0 +1,63 @@ +from mlflow.deployments import get_deploy_client +from databricks.sdk import WorkspaceClient +import uuid + + +def _get_endpoint_task_type(endpoint_name: str) -> str: + """Get the task type of a serving endpoint.""" + w = WorkspaceClient() + ep = w.serving_endpoints.get(endpoint_name) + return ep.task + + +def is_endpoint_supported(endpoint_name: str) -> bool: + """Check if the endpoint has a supported task type.""" + task_type = _get_endpoint_task_type(endpoint_name) + supported_task_types = ["agent/v1/chat", "agent/v2/chat", "llm/v1/chat", "agent/v1/responses"] + return task_type in supported_task_types + + +def _validate_endpoint_task_type(endpoint_name: str) -> None: + """Validate that the endpoint has a supported task type.""" + if not is_endpoint_supported(endpoint_name): + raise Exception( + f"Detected unsupported endpoint type for this basic chatbot template. " + f"This chatbot template only supports chat completions-compatible endpoints. " + f"For a richer chatbot template with support for all conversational endpoints on Databricks, " + f"see https://docs.databricks.com/aws/en/generative-ai/agent-framework/chat-app" + ) + + +def _query_endpoint( + endpoint_name: str, messages: list[dict[str, str]], max_tokens, thread_id +) -> list[dict[str, str]]: + """Calls a model serving endpoint.""" + _validate_endpoint_task_type(endpoint_name) + if thread_id is None: + thread_id = str(uuid.uuid4()) + res = get_deploy_client("databricks").predict( + endpoint=endpoint_name, + inputs={ + "input": messages, + "max_tokens": max_tokens, + "custom_inputs": {"thread_id": thread_id}, + "temperature": 0.1, + }, + ) + if "output" in res: + return res["output"][-1]["content"] + raise Exception( + "This app can only run against:" + "1) Databricks foundation model or external model endpoints with the chat task type (described in https://docs.databricks.com/aws/en/machine-learning/model-serving/score-foundation-models#chat-completion-model-query)" + "2) Databricks agent serving endpoints that implement the conversational agent schema documented " + "in https://docs.databricks.com/aws/en/generative-ai/agent-framework/author-agent" + ) + + +def query_endpoint(endpoint_name, messages, max_tokens, thread_id): + """ + Query a chat-completions or agent serving endpoint + If querying an agent serving endpoint that returns multiple messages, this method + returns the last message + .""" + return _query_endpoint(endpoint_name, messages, max_tokens, thread_id)[-1] diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/requirements.txt b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/requirements.txt new file mode 100644 index 0000000..f0734bb --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/databricks_apps/streamlit-chatbot-app/requirements.txt @@ -0,0 +1,3 @@ +mlflow>=2.21.2 +streamlit==1.44.1 +databricks-sdk diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/lakebase_instance.yml b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/lakebase_instance.yml new file mode 100644 index 0000000..f5e0eb0 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/lakebase_instance.yml @@ -0,0 +1,13 @@ +# resources: +# database_instances: +# my_instance: +# name: lakebase-instance-name +# capacity: CU_1 +# # below defaults to true anyways +# # enable_pg_native_login: true +# database_catalogs: +# my_catalog: +# database_instance_name: ${resources.database_instances.my_instance.name} +# name: lakebase-catalog-name +# database_name: my_database +# create_database_if_not_exists: true \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/short_term_memory_agent_job.yml b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/short_term_memory_agent_job.yml new file mode 100644 index 0000000..98be2e6 --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/short_term_memory_agent_job.yml @@ -0,0 +1,24 @@ +# resources: +# jobs: +# bo_lakebase_agent: +# name: lakebase_checkpointer_agent +# tasks: +# - task_key: 02_lakebase_langgraph_checkpointer_agent +# notebook_task: +# notebook_path: ../02-lakebase-langgraph-checkpointer-agent.ipynb +# base_parameters: +# catalog: catalog +# model: memory_agent +# schema: schema +# secret_scope: dbdemos +# source: WORKSPACE +# environment_key: env1 +# tags: +# Bundle: short-term-memory +# DeployedBy: DAB +# Environment: Test +# environments: +# - environment_key: env1 +# spec: +# environment_version: "2" +# performance_target: PERFORMANCE_OPTIMIZED \ No newline at end of file diff --git a/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/short_term_memory_app.yml b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/short_term_memory_app.yml new file mode 100644 index 0000000..108e44d --- /dev/null +++ b/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase/resources/short_term_memory_app.yml @@ -0,0 +1,12 @@ +# resources: +# apps: +# short_term_memory_example: +# name: "bo-cheng-lakebase-agent-app" +# description: "A short term memory agent app" +# # The location of the source code for the app +# source_code_path: "../databricks_apps/streamlit-chatbot-app" +# user_api_scopes: +# - serving.serving-endpoints +# sync: +# include: +# - "../databricks_apps/streamlit-chatbot-app/*" diff --git a/CODEOWNERS b/CODEOWNERS index 79f580d..c88d2d9 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -29,3 +29,4 @@ /2025-11-agentic-ai-pension-advisor/* @pravinva /2025-12-surfacing-lakebase-tables-in-databricks-apps @sylvia-222 /2025-12-transformWithState-AdAttribution @craig-db +/2026-01-how-to-build-ai-agents-with-conversation-memory-using-lakebase @bcheng004 \ No newline at end of file