Updates + maintenance

pbv0 · pbv0 · commit 6643de10598a · 2025-11-12T22:09:59.000+01:00
diff --git a/streamlit/requirements.txt b/streamlit/requirements.txt
@@ -1,8 +1,7 @@
-databricks-connect==16.0.0
-databricks-sdk[openai]==0.60.0
-databricks-sql-connector==4.0.0
+databricks-connect==16.1.7
+databricks-sdk[openai]==0.73.0
+databricks-sql-connector==4.1.4
 pandas==2.2.3
-psycopg[binary]==3.2.9
-psycopg-pool==3.2.6
-streamlit==1.41.1
+psycopg[binary]==3.2.12
+streamlit==1.51.0
 streamlit-folium==0.25.3
diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py
@@ -13,20 +13,20 @@
         "title": "Tables",
         "views": [
             {
-                "label": "Connect an OLTP database",
-                "help": "Query an OLTP database instance table.",
-                "page": "views/oltp_database_connect.py",
+                "label": "Read a Lakebase table",
+                "help": "Query a Lakebase OLTP database instance table.",
+                "page": "views/lakebase_read.py",
                 "icon": ":material/database:",
             },
             {
-                "label": "Read a Delta table",
-                "help": "Query a Unity Catalog Delta table.",
+                "label": "Read a Databricks table",
+                "help": "Read a Databricks table.",
                 "page": "views/tables_read.py",
                 "icon": ":material/table_view:",
             },
             {
-                "label": "Edit a Delta table",
-                "help": "Interactively edit a Delta table in the UI.",
+                "label": "Edit a Databricks table",
+                "help": "Interactively edit a Databricks table in the UI.",
                 "page": "views/tables_edit.py",
                 "icon": ":material/edit_document:",
             },
@@ -58,24 +58,24 @@
                 "page": "views/ml_serving_invoke.py",
                 "icon": ":material/experiment:",
             },
+            {
+                "label": "Invoke amulti-modal LLM",
+                "help": "Send text and images for visual-language LLM tasks.",
+                "page": "views/ml_serving_invoke_mllm.py",
+                "icon": ":material/sensors:",
+            },
             {
                 "label": "Run vector search",
                 "help": "Use Mosaic AI to generate embeddings for textual data and perform vector search.",
                 "page": "views/ml_vector_search.py",
                 "icon": ":material/search:",
             },
             {
-                "label": "Connect an MCP server",
-                "help": "Connect to a Model Context Protocol server.",
+                "label": "Connect to an MCP server",
+                "help": "Connect to an MCP server.",
                 "page": "views/mcp_connect.py",
                 "icon": ":material/modeling:",
             },
-            {
-                "label": "Invoke multi-modal LLM",
-                "help": "Send text and images for visual-language LLM tasks.",
-                "page": "views/ml_serving_invoke_mllm.py",
-                "icon": ":material/sensors:",
-            },
         ],
     },
     {
@@ -168,20 +168,21 @@
             },
         ],
     },
-    "title": "External services",
-    "views": [
-        {
-            "label": "External connections",
-            "help": "Connect to a Unity Catalog-governed HTTP endpoint.",
-            "page": "views/external_connections.py",
-            "icon": ":material/link:",
-        },
-        {
-            "label": "Retrieve a secret",
-            "help": "Get a sensitive API key without hard-coding it.",
-            "page": "views/secrets_retrieve.py",
-            "icon": ":material/lock:",
-        },
-    ],
-},
+    {
+        "title": "External services",
+        "views": [
+            {
+                "label": "Use external connections",
+                "help": "Use a Unity Catalog-governed HTTP endpoint.",
+                "page": "views/external_connections.py",
+                "icon": ":material/link:",
+            },
+            {
+                "label": "Retrieve a secret",
+                "help": "Get a sensitive API key without hard-coding it.",
+                "page": "views/secrets_retrieve.py",
+                "icon": ":material/lock:",
+            },
+        ],
+    },
 ]
diff --git a/streamlit/views/lakebase_read.py b/streamlit/views/lakebase_read.py
@@ -0,0 +1,192 @@
+import pandas as pd
+import psycopg
+from databricks.sdk import WorkspaceClient
+
+import streamlit as st
+
+st.header("Lakebase Postgres database", divider=True)
+st.subheader("Read a table")
+st.write(
+    "This app connects to a [Databricks Lakebase](https://docs.databricks.com/aws/en/oltp/) OLTP database instance and reads the first 100 rows from any table. "
+    "Provide the instance name, database, schema, and table name."
+)
+
+
+w = WorkspaceClient()
+
+
+def get_connection(host: str, database: str, user: str) -> psycopg.Connection:
+    """Get a connection to the Lakebase database using OAuth token."""
+    token = w.config.oauth_token().access_token
+
+    return psycopg.connect(
+        host=host,
+        port=5432,
+        dbname=database,
+        user=user,
+        password=token,
+        sslmode="require",
+    )
+
+
+def query_df(host: str, database: str, user: str, sql: str) -> pd.DataFrame:
+    """Execute a SQL query and return results as a DataFrame."""
+    conn = get_connection(host, database, user)
+    try:
+        with conn.cursor() as cur:
+            cur.execute(sql)
+            if not cur.description:
+                return pd.DataFrame()
+
+            cols = [d.name for d in cur.description]
+            rows = cur.fetchall()
+            return pd.DataFrame(rows, columns=cols)
+    finally:
+        conn.close()
+
+
+tab_try, tab_code, tab_reqs = st.tabs(
+    ["**Try it**", "**Code snippet**", "**Requirements**"]
+)
+
+with tab_try:
+    instance_names = [i.name for i in w.database.list_database_instances()]
+    instance_name = st.selectbox("Database instance:", instance_names)
+    database = st.text_input("Database:", value="databricks_postgres")
+    schema = st.text_input("Schema:", value="public")
+    table = st.text_input("Table:", value="your_table_name")
+
+    # Get user and host
+    user = w.config.client_id or w.current_user.me().user_name
+    host = ""
+    if instance_name:
+        host = w.database.get_database_instance(name=instance_name).read_write_dns
+
+    if st.button("Read table"):
+        if not all([instance_name, host, database, schema, table]):
+            st.error("Please provide all required fields.")
+        else:
+            df = query_df(
+                host, database, user, f"SELECT * FROM {schema}.{table} LIMIT 100"
+            )
+            st.dataframe(df, use_container_width=True)
+            st.caption(f"Showing first 100 rows from {schema}.{table}")
+
+with tab_code:
+    st.code(
+        '''import os
+import pandas as pd
+import psycopg
+from databricks.sdk import WorkspaceClient
+import streamlit as st
+
+
+w = WorkspaceClient()
+
+
+def get_connection(host: str, database: str, user: str) -> psycopg.Connection:
+    """Get a connection to the Lakebase database using OAuth token."""
+    token = w.config.oauth_token().access_token
+    
+    return psycopg.connect(
+        host=host,
+        port=5432,
+        dbname=database,
+        user=user,
+        password=token,
+        sslmode="require",
+    )
+
+
+def query_df(host: str, database: str, user: str, sql: str) -> pd.DataFrame:
+    """Execute a SQL query and return results as a DataFrame."""
+    conn = get_connection(host, database, user)
+    try:
+        with conn.cursor() as cur:
+            cur.execute(sql)
+            if not cur.description:
+                return pd.DataFrame()
+            
+            cols = [d.name for d in cur.description]
+            rows = cur.fetchall()
+            return pd.DataFrame(rows, columns=cols)
+    finally:
+        conn.close()
+
+
+# Get connection parameters from environment variables (set by Databricks Apps)
+# or fall back to manual configuration
+host = os.getenv("PGHOST")
+database = os.getenv("PGDATABASE")
+user = os.getenv("PGUSER")
+
+if not all([host, database, user]):
+    # Manual configuration if environment variables are not set
+    instance_name = "your_instance_name"
+    database = "databricks_postgres"
+    user = w.config.client_id or w.current_user.me().user_name
+    host = w.database.get_database_instance(name=instance_name).read_write_dns
+
+# Query table
+schema = "public"
+table = "your_table_name"
+df = query_df(host, database, user, f"SELECT * FROM {schema}.{table} LIMIT 100")
+st.dataframe(df)
+''',
+        language="python",
+    )
+
+with tab_reqs:
+    st.info(
+        "💡 **Tip:** Add your Lakebase instance as an App resource to automatically configure connection parameters via environment variables. "
+        "See the [Lakebase resource documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/lakebase) for details."
+    )
+
+    col1, col2, col3 = st.columns(3)
+
+    with col1:
+        st.markdown(
+            """
+            **Permissions (app service principal)**
+            * Add the Lakebase instance as an [**App resource**](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/lakebase) to automatically configure permissions and environment variables (`PGHOST`, `PGDATABASE`, `PGUSER`, etc.).
+            * Alternatively, manually create a Postgres role for the service principal. See [this guide](https://docs.databricks.com/aws/en/oltp/pg-roles?language=PostgreSQL#create-postgres-roles-and-grant-privileges-for-databricks-identities).
+            * Example grants for read access:
+            """
+        )
+        st.code(
+            """
+GRANT CONNECT ON DATABASE databricks_postgres TO "<service-principal-id>";
+GRANT USAGE ON SCHEMA public TO "<service-principal-id>";
+GRANT SELECT ON TABLE your_table_name TO "<service-principal-id>";
+            """,
+            language="sql",
+        )
+
+    with col2:
+        st.markdown(
+            """
+            **Databricks resources**
+            * [Lakebase](https://docs.databricks.com/aws/en/oltp/) database instance (Postgres).
+            * An existing Postgres database, schema, and table with data.
+            """
+        )
+
+    with col3:
+        st.markdown(
+            """
+            **Dependencies**
+            * [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk>=0.60.0`
+            * [Psycopg](https://pypi.org/project/psycopg/) - `psycopg[binary]`
+            * [Pandas](https://pypi.org/project/pandas/) - `pandas`
+            * [Streamlit](https://pypi.org/project/streamlit/) - `streamlit`
+            """
+        )
+
+    st.info(
+        "[This guide](https://docs.databricks.com/aws/en/oltp/query/sql-editor#create-a-new-query) "
+        "shows you how to query your Lakebase."
+    )
+
+    st.warning(
+        "⚠️ Tokens expire periodically; this app refreshes on each new connection and enforces TLS (sslmode=require)."
+    )
diff --git a/streamlit/views/oltp_database_connect.py b/streamlit/views/oltp_database_connect.py