Merge pull request #11 from scottDBX1886/main

pbv0 · web-flow · commit 83be1c9e5430 · 2025-03-22T15:17:04.000+01:00
Modification to Table Read and Edit
diff --git a/streamlit/view_groups.py b/streamlit/view_groups.py
@@ -133,4 +133,16 @@
             },
         ],
     },
+    
+    {
+        "title": "Unity Catalog",
+        "views": [
+            {
+                "label": "Get Catalogs",
+                "help": "Get meta data.",
+                "page": "views/unity_catalog_get.py",
+                "icon": ":material/lan:",
+            },
+        ],
+    }
 ]
diff --git a/streamlit/views/embed_dashboard.py b/streamlit/views/embed_dashboard.py
@@ -1,5 +1,7 @@
 import streamlit as st
 import streamlit.components.v1 as components
+import requests
+from databricks.sdk.core import Config
 
 st.header("Data Visualization", divider=True)
 st.subheader("AI/BI Dashboard")
@@ -12,13 +14,46 @@
 
 
 with tab_a:
-    iframe_source = st.text_input(
-        "Embed the dashboard:",
-        placeholder="https://dbc-f0e9b24f-3d49.cloud.databricks.com/embed/dashboardsv3/01eff8112e9411cd930f0ae0d2c6b63d?o=37581543725667790",
-        help="Copy and paste the URL from the dashboard UI Share -> Embed iframe.",
+
+
+    cfg = Config()
+
+    host = cfg.host
+
+    token = list(cfg.authenticate().values())[0].split(" ")[1]
+    url = f"{host}/api/2.0/lakeview/dashboards"
+    headers = {
+        "Authorization": f"Bearer {token}"
+    }
+
+    response = requests.get(url, headers=headers)
+    dashboards = response.json()
+    dashboard_paths = {dashboard['display_name']: dashboard['dashboard_id'] for dashboard in dashboards['dashboards']}
+
+    published_dashboards = []
+
+    for dashboard in dashboards['dashboards']:
+        dashboard_id = dashboard['dashboard_id']
+        
+        published_url = f"{host}/api/2.0/lakeview/dashboards/{dashboard_id}/published"
+        response = requests.get(published_url, headers=headers)
+    
+        if response.status_code == 200:
+            published_dashboards.append((dashboard['display_name'], dashboard['dashboard_id']))
+            print( dashboard['display_name'] + ' ' + dashboard['dashboard_id'])
+    final_published_dashboards = {k: v for k, v in published_dashboards }
+
+    #st.info(final_published_dashboards)
+    iframe_source_temp = st.selectbox(
+        "Select your AI/BI Dashboard:", [""] + list(final_published_dashboards.keys()),
+        help="Dashboard list populated from your workspace using app service principal.",
     )
+ 
+    dashboard_id = final_published_dashboards.get(iframe_source_temp)
 
-    if iframe_source:
+    if iframe_source_temp and iframe_source_temp != "":
+        iframe_source = f"{host}/embed/dashboardsv3/{dashboard_id}"
+        #st.info(iframe_source)
         components.iframe(src=iframe_source, width=700, height=600, scrolling=True)
 
 with tab_b:
diff --git a/streamlit/views/tables_edit.py b/streamlit/views/tables_edit.py
@@ -2,6 +2,7 @@
 import streamlit as st
 from databricks import sql
 from databricks.sdk.core import Config
+from databricks.sdk import WorkspaceClient
 
 
 st.header(body="Tables", divider=True)
@@ -14,6 +15,14 @@
 
 cfg = Config()
 
+w = WorkspaceClient()
+
+warehouses = w.warehouses.list()
+
+warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses}
+
+catalogs = w.catalogs.list()
+
 
 @st.cache_resource
 def get_connection(http_path):
@@ -24,12 +33,23 @@ def get_connection(http_path):
     )
 
 
-def read_table(table_name: str, conn) -> pd.DataFrame:
+def read_table(table_name, conn):
     with conn.cursor() as cursor:
-        cursor.execute(f"SELECT * FROM {table_name}")
+        query = f"SELECT * FROM {table_name}"
+        cursor.execute(query)
         return cursor.fetchall_arrow().to_pandas()
 
 
+def get_schema_names(catalog_name):
+    schemas = w.schemas.list(catalog_name=catalog_name)
+    return [schema.name for schema in schemas]
+
+
+def get_table_names(catalog_name, schema_name):
+    tables = w.tables.list(catalog_name=catalog_name, schema_name=schema_name)
+    return [table.name for table in tables]
+
+
 def insert_overwrite_table(table_name: str, df: pd.DataFrame, conn):
     progress = st.empty()
     with conn.cursor() as cursor:
@@ -45,26 +65,40 @@ def insert_overwrite_table(table_name: str, df: pd.DataFrame, conn):
 tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"])
 
 with tab_a:
-    http_path_input = st.text_input(
-        "Specify the HTTP Path to your Databricks SQL Warehouse:",
-        placeholder="/sql/1.0/warehouses/xxxxxx",
+    http_path_input = st.selectbox(
+        "Select a SQL warehouse:", [""] + list(warehouse_paths.keys())
     )
 
-    table_name = st.text_input(
-        "Specify a Catalog table name:", placeholder="catalog.schema.table"
+    catalog_name = st.selectbox(
+        "Select a catalog:", [""] + [catalog.name for catalog in catalogs]
     )
 
-    if http_path_input and table_name:
-        conn = get_connection(http_path_input)
-        original_df = read_table(table_name, conn)
-        edited_df = st.data_editor(original_df, num_rows="dynamic", hide_index=True)
+    if catalog_name and catalog_name != "":
+        schema_names = get_schema_names(catalog_name)
+        schema_name = st.selectbox("Select a schema:", [""] + schema_names)
+
+    if catalog_name and catalog_name != "" and schema_name and schema_name != "":
+        table_names = get_table_names(catalog_name, schema_name)
+        table_name = st.selectbox("Select a table:", [""] + table_names)
+
+        in_table_name = f"{catalog_name}.{schema_name}.{table_name}"
+
+        if (
+            http_path_input
+            and table_name
+            and catalog_name
+            and schema_name
+            and table_name != ""
+        ):
+            http_path = warehouse_paths[http_path_input]
+            conn = get_connection(http_path)
+            original_df = read_table(in_table_name, conn)
+            edited_df = st.data_editor(original_df, num_rows="dynamic", hide_index=True)
 
-        df_diff = pd.concat([original_df, edited_df]).drop_duplicates(keep=False)
-        if not df_diff.empty:
-            if st.button("Save changes"):
-                insert_overwrite_table(table_name, edited_df, conn)
-    else:
-        st.warning("Provide both the warehouse path and a table name to load data.")
+            df_diff = pd.concat([original_df, edited_df]).drop_duplicates(keep=False)
+            if not df_diff.empty:
+                if st.button("Save changes"):
+                    insert_overwrite_table(in_table_name, edited_df, conn)
 
 
 with tab_b:
@@ -75,6 +109,7 @@ def insert_overwrite_table(table_name: str, df: pd.DataFrame, conn):
         from databricks import sql
         from databricks.sdk.core import Config
 
+
         cfg = Config() # Set the DATABRICKS_HOST environment variable when running locally
 
 
diff --git a/streamlit/views/tables_read.py b/streamlit/views/tables_read.py
@@ -1,6 +1,7 @@
 import streamlit as st
 from databricks import sql
 from databricks.sdk.core import Config
+from databricks.sdk import WorkspaceClient
 
 st.header(body="Tables", divider=True)
 st.subheader("Read a table")
@@ -10,6 +11,14 @@
 
 cfg = Config()
 
+w = WorkspaceClient()
+
+warehouses = w.warehouses.list()
+
+warehouse_paths = {wh.name: wh.odbc_params.path for wh in warehouses}
+
+catalogs = w.catalogs.list()
+
 
 @st.cache_resource
 def get_connection(http_path):
@@ -27,21 +36,41 @@ def read_table(table_name, conn):
         return cursor.fetchall_arrow().to_pandas()
 
 
+def get_schema_names(catalog_name):
+    schemas = w.schemas.list(catalog_name=catalog_name)
+    return [schema.name for schema in schemas]
+
+
+def get_table_names(catalog_name, schema_name):
+    tables = w.tables.list(catalog_name=catalog_name, schema_name=schema_name)
+    return [table.name for table in tables]
+
+
 tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippet**", "**Requirements**"])
 
 with tab_a:
-    http_path_input = st.text_input(
-        "Enter your Databricks HTTP Path:", placeholder="/sql/1.0/warehouses/xxxxxx"
+    http_path_input = st.selectbox(
+        "Select a SQL warehouse:", [""] + list(warehouse_paths.keys())
     )
 
-    table_name = st.text_input(
-        "Specify a Unity Catalog table name:", placeholder="catalog.schema.table"
+    catalog_name = st.selectbox(
+        "Select a catalog:", [""] + [catalog.name for catalog in catalogs]
     )
 
-    if http_path_input and table_name:
-        conn = get_connection(http_path_input)
-        df = read_table(table_name, conn)
-        st.dataframe(df)
+    if catalog_name and catalog_name != "":
+        schema_names = get_schema_names(catalog_name)
+        schema_name = st.selectbox("Select a schema:", [""] + schema_names)
+
+    if catalog_name and catalog_name != "" and schema_name and schema_name != "":
+        table_names = get_table_names(catalog_name, schema_name)
+        table_name = st.selectbox("Select a table:", [""] + table_names)
+
+        if http_path_input and table_name and table_name != "":
+            http_path = warehouse_paths[http_path_input]
+            conn = get_connection(http_path)
+            df = read_table(f"{catalog_name}.{schema_name}.{table_name}", conn)
+            st.dataframe(df)
+
 
 with tab_b:
     st.code(
@@ -54,22 +83,20 @@ def read_table(table_name, conn):
         cfg = Config()  # Set the DATABRICKS_HOST environment variable when running locally
 
 
-        @st.cache_resource
+        @st.cache_resource # connection is cached
         def get_connection(http_path):
             return sql.connect(
                 server_hostname=cfg.host,
                 http_path=http_path,
                 credentials_provider=lambda: cfg.authenticate,
             )
 
-
         def read_table(table_name, conn):
             with conn.cursor() as cursor:
                 query = f"SELECT * FROM {table_name}"
                 cursor.execute(query)
                 return cursor.fetchall_arrow().to_pandas()
 
-
         http_path_input = st.text_input(
             "Enter your Databricks HTTP Path:", placeholder="/sql/1.0/warehouses/xxxxxx"
         )