11import pandas as pd
22import streamlit as st
33import datetime
4- import matplotlib .pyplot as plt
54from databricks .sdk import WorkspaceClient
65
76
8- workspace = WorkspaceClient ()
7+ w = WorkspaceClient ()
8+
99
1010def get_catalogs ():
11- catalogs = workspace .catalogs .list ()
12- # Parse metadata into a list of dictionaries
11+ catalogs = w .catalogs .list ()
12+
1313 catalogs_data = []
1414 for catalog in catalogs :
15- catalogs_data .append ({
16- "Catalog Name" : catalog .name ,
17- "Owner" : catalog .owner ,
18- "Comment" : catalog .comment ,
19- "Created At" : datetime .datetime .fromtimestamp (catalog .created_at / 1000 ),
20- "Updated At" : datetime .datetime .fromtimestamp (catalog .updated_at / 1000 ),
21- })
15+ catalogs_data .append (
16+ {
17+ "Catalog name" : catalog .name ,
18+ "Owner" : catalog .owner ,
19+ "Comment" : catalog .comment ,
20+ "Created at" : datetime .datetime .fromtimestamp (
21+ catalog .created_at / 1000
22+ ),
23+ "Updated at" : datetime .datetime .fromtimestamp (
24+ catalog .updated_at / 1000
25+ ),
26+ }
27+ )
2228 return pd .DataFrame (catalogs_data )
2329
24- def get_schemas ():
30+
31+ def get_catalog_names ():
32+ catalogs = w .catalogs .list ()
33+ return [catalog .name for catalog in catalogs ]
34+
35+
36+ def get_schemas_for_catalog (catalog_name ):
2537 schema_data = []
26- for catalog in workspace .catalogs .list ():
27- schemas = workspace .schemas .list (catalog_name = catalog .name )
28- for schema in schemas :
29- print (schema .catalog_name )
30- print (schema )
31- schema_data .append ({
32- "Catalog Name" : schema .catalog_name ,
33- "Catalog Type" : schema .catalog_type ,
34- "Schema Name" : schema .full_name ,
38+ schemas = w .schemas .list (catalog_name = catalog_name , max_results = 10 )
39+ for schema in schemas :
40+ schema_data .append (
41+ {
42+ "Catalog name" : schema .catalog_name ,
43+ "Catalog type" : schema .catalog_type ,
44+ "Schema name" : schema .full_name ,
3545 "Owner" : schema .owner ,
3646 "Comment" : schema .comment ,
37- "Created At" : datetime .datetime .fromtimestamp (schema .created_at / 1000 ),
38- "Updated At" : datetime .datetime .fromtimestamp (schema .updated_at / 1000 ),
39- "Effective Predictive Optimization" : schema .effective_predictive_optimization_flag ,
40- "Properites" : schema .properties
41-
42- })
47+ "Created at" : datetime .datetime .fromtimestamp (schema .created_at / 1000 )
48+ if schema .created_at
49+ else None ,
50+ "Updated at" : datetime .datetime .fromtimestamp (schema .updated_at / 1000 )
51+ if schema .updated_at
52+ else None ,
53+ "Effective predictive optimization" : schema .effective_predictive_optimization_flag ,
54+ "Properties" : schema .properties ,
55+ }
56+ )
4357 return pd .DataFrame (schema_data )
4458
4559
46-
4760st .header (body = "Unity Catalog" , divider = True )
4861st .subheader ("Get catalog and schema information" )
49- st .write (
50- "This receipt gets the meta data for the catalogs and the schemas."
51- )
62+ st .write ("This receipt lists metadata for catalogs and schemas in Unity Catalog." )
5263
53- tab_a , tab_b = st .tabs (["**Try it**" , "**Code snippets**" ])
64+ tab_a , tab_b , tab_c = st .tabs (["**Try it**" , "**Code snippets**" , "**Requirements **" ])
5465
5566with tab_a :
56- if st .button ("Try It " ):
57- st .write ( '### Databricks Catalogs' )
58- st .dataframe ( get_catalogs () )
67+ if st .button ("Get catalogs " ):
68+ st .session_state [ "catalogs_df" ] = get_catalogs ( )
69+ st .session_state [ "catalog_names" ] = get_catalog_names ( )
5970
60- st .write ('### Databricks Schema' )
71+ if "catalogs_df" in st .session_state :
72+ st .write ("### Catalogs" )
73+ st .dataframe (st .session_state ["catalogs_df" ])
6174
62- schemas = get_schemas ()
63- st .dataframe (schemas )
75+ st .write ("### Select a Catalog to View its Schemas" )
76+ selected_catalog = st .selectbox (
77+ "Choose a catalog" , options = st .session_state ["catalog_names" ]
78+ )
79+
80+ if st .button ("Get schemas for selected catalog" ):
81+ schemas_df = get_schemas_for_catalog (selected_catalog )
82+ st .write (f"### Schemas for { selected_catalog } " )
83+ if not schemas_df .empty :
84+ st .dataframe (schemas_df )
85+ else :
86+ st .info (f"No schemas found in the catalog '{ selected_catalog } '" )
6487
6588
6689table = [
@@ -71,9 +94,12 @@ def get_schemas():
7194 "code" : """
7295 ```python
7396 from databricks.sdk import WorkspaceClient
74- workspace = WorkspaceClient()
97+
98+
99+ w = WorkspaceClient()
100+
75101 def get_catalogs():
76- catalogs = workspace .catalogs.list()
102+ catalogs = w .catalogs.list()
77103 # Parse metadata into a list of dictionaries
78104 catalogs_data = []
79105 for catalog in catalogs:
@@ -91,36 +117,42 @@ def get_catalogs():
91117 """ ,
92118 },
93119 {
94- "type" : "Get Schemas" ,
95- "param" : "get_schemas " ,
96- "description" : "Get the schemas" ,
120+ "type" : "Get Schemas for Selected Catalog " ,
121+ "param" : "get_schemas_for_catalog " ,
122+ "description" : "Get the schemas for a specific catalog " ,
97123 "code" : """
98124 ```python
99125 from databricks.sdk import WorkspaceClient
100126
101- workspace = WorkspaceClient()
127+
128+ w = WorkspaceClient()
129+
130+ def get_catalog_names():
131+ catalogs = w.catalogs.list()
132+ return [catalog.name for catalog in catalogs]
102133
103- def get_schemas( ):
134+ def get_schemas_for_catalog(catalog_name ):
104135 schema_data = []
105- for catalog in workspace.catalogs.list():
106- schemas = workspace.schemas.list(catalog_name=catalog.name)
107- for schema in schemas:
108- print(schema.catalog_name)
109- print(schema)
110- schema_data.append({
111- "Catalog Name": schema.catalog_name,
112- "Catalog Type": schema.catalog_type,
113- "Schema Name": schema.full_name,
114- "Owner": schema.owner,
115- "Comment": schema.comment,
116- "Created At": datetime.datetime.fromtimestamp(schema.created_at/1000),
117- "Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000),
118- "Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
119- "Properites": schema.properties
120- })
136+ schemas = w.schemas.list(catalog_name=catalog_name, max_results=10)
137+ for schema in schemas:
138+ schema_data.append({
139+ "Catalog Name": schema.catalog_name,
140+ "Catalog Type": schema.catalog_type,
141+ "Schema Name": schema.full_name,
142+ "Owner": schema.owner,
143+ "Comment": schema.comment,
144+ "Created At": datetime.datetime.fromtimestamp(schema.created_at/1000) if schema.created_at else None,
145+ "Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000) if schema.updated_at else None,
146+ "Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
147+ "Properties": schema.properties
148+ })
121149 return pd.DataFrame(schema_data)
122- schemas = get_schemas()
123- st.dataframe(schemas)
150+
151+ # In the UI:
152+ selected_catalog = st.selectbox("Choose a catalog", options=get_catalog_names())
153+ if st.button("Get Schemas"):
154+ schemas = get_schemas_for_catalog(selected_catalog)
155+ st.dataframe(schemas)
124156 ```
125157 """ ,
126158 },
@@ -130,4 +162,30 @@ def get_schemas():
130162 for i , row in enumerate (table ):
131163 with st .expander (f"**{ row ['type' ]} ({ row ['param' ]} )**" , expanded = (i == 0 )):
132164 st .markdown (f"**Description**: { row ['description' ]} " )
133- st .markdown (row ["code" ])
165+ st .markdown (row ["code" ])
166+
167+ with tab_c :
168+ st .info ("""
169+ To list all catalogs, you need the [metastore admin](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/admin-privileges#metastore-admins) role.
170+ Otherwise, only catalogs for which you have the `USE_CATALOG` permission will be retrieved.
171+ """ )
172+
173+ col1 , col2 , col3 = st .columns (3 )
174+
175+ with col1 :
176+ st .markdown ("""
177+ **Permissions (app service principal)**
178+ * `USE_CATALOG` on the Unity Catalog catalogs to list
179+ * `USE_SCHEMA` on the schemas you want to view
180+ """ )
181+ with col2 :
182+ st .markdown ("""
183+ **Databricks resources**
184+ * Unity Catalog enabled workspace
185+ """ )
186+ with col3 :
187+ st .markdown ("""
188+ **Dependencies**
189+ * [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk`
190+ * [Streamlit](https://pypi.org/project/streamlit/) - `streamlit`
191+ """ )
0 commit comments