Skip to content

Commit 551e869

Browse files
committed
Small fixes
1 parent 7714eda commit 551e869

File tree

3 files changed

+124
-75
lines changed

3 files changed

+124
-75
lines changed

readme.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@ Not sure what to contribute? Here are some commonly requested samples:
3030
- Gradio implementation
3131
- Flask implementation
3232

33-
## First Start
34-
35-
- run requirements.txt
36-
- setup ~/.databrickscfg to config your databricks host and token
37-
3833
## Support
3934

4035
These samples are experimental and meant for demonstration purposes only. They are provided as-is and without formal support by Databricks. Ensure your organization's security, compliance, and operational best practices are applied before deploying them to production.

streamlit/view_groups.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,14 @@
105105
},
106106
],
107107
},
108-
{
108+
{
109109
"title": "Unity Catalog",
110110
"views": [
111111
{
112-
"label": "Get Catalogs",
113-
"help": "Get meta data.",
112+
"label": "List catalogs and schemas",
113+
"help": "Get metadata.",
114114
"page": "views/unity_catalog_get.py",
115-
"icon": ":material/lan:",
115+
"icon": ":material/library_books:",
116116
},
117117
],
118118
},
Lines changed: 120 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,89 @@
11
import pandas as pd
22
import streamlit as st
33
import datetime
4-
import matplotlib.pyplot as plt
54
from databricks.sdk import WorkspaceClient
65

76

8-
workspace = WorkspaceClient()
7+
w = WorkspaceClient()
8+
99

1010
def get_catalogs():
11-
catalogs = workspace.catalogs.list()
12-
# Parse metadata into a list of dictionaries
11+
catalogs = w.catalogs.list()
12+
1313
catalogs_data = []
1414
for catalog in catalogs:
15-
catalogs_data.append({
16-
"Catalog Name": catalog.name,
17-
"Owner": catalog.owner,
18-
"Comment": catalog.comment,
19-
"Created At": datetime.datetime.fromtimestamp(catalog.created_at/1000),
20-
"Updated At": datetime.datetime.fromtimestamp(catalog.updated_at/1000),
21-
})
15+
catalogs_data.append(
16+
{
17+
"Catalog name": catalog.name,
18+
"Owner": catalog.owner,
19+
"Comment": catalog.comment,
20+
"Created at": datetime.datetime.fromtimestamp(
21+
catalog.created_at / 1000
22+
),
23+
"Updated at": datetime.datetime.fromtimestamp(
24+
catalog.updated_at / 1000
25+
),
26+
}
27+
)
2228
return pd.DataFrame(catalogs_data)
2329

24-
def get_schemas():
30+
31+
def get_catalog_names():
32+
catalogs = w.catalogs.list()
33+
return [catalog.name for catalog in catalogs]
34+
35+
36+
def get_schemas_for_catalog(catalog_name):
2537
schema_data = []
26-
for catalog in workspace.catalogs.list():
27-
schemas = workspace.schemas.list(catalog_name=catalog.name)
28-
for schema in schemas:
29-
print(schema.catalog_name)
30-
print(schema)
31-
schema_data.append({
32-
"Catalog Name": schema.catalog_name,
33-
"Catalog Type": schema.catalog_type,
34-
"Schema Name": schema.full_name,
38+
schemas = w.schemas.list(catalog_name=catalog_name, max_results=10)
39+
for schema in schemas:
40+
schema_data.append(
41+
{
42+
"Catalog name": schema.catalog_name,
43+
"Catalog type": schema.catalog_type,
44+
"Schema name": schema.full_name,
3545
"Owner": schema.owner,
3646
"Comment": schema.comment,
37-
"Created At": datetime.datetime.fromtimestamp(schema.created_at/1000),
38-
"Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000),
39-
"Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
40-
"Properites": schema.properties
41-
42-
})
47+
"Created at": datetime.datetime.fromtimestamp(schema.created_at / 1000)
48+
if schema.created_at
49+
else None,
50+
"Updated at": datetime.datetime.fromtimestamp(schema.updated_at / 1000)
51+
if schema.updated_at
52+
else None,
53+
"Effective predictive optimization": schema.effective_predictive_optimization_flag,
54+
"Properties": schema.properties,
55+
}
56+
)
4357
return pd.DataFrame(schema_data)
4458

4559

46-
4760
st.header(body="Unity Catalog", divider=True)
4861
st.subheader("Get catalog and schema information")
49-
st.write(
50-
"This receipt gets the meta data for the catalogs and the schemas."
51-
)
62+
st.write("This receipt lists metadata for catalogs and schemas in Unity Catalog.")
5263

53-
tab_a, tab_b = st.tabs(["**Try it**", "**Code snippets**"])
64+
tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippets**", "**Requirements**"])
5465

5566
with tab_a:
56-
if st.button("Try It"):
57-
st.write('### Databricks Catalogs')
58-
st.dataframe(get_catalogs())
67+
if st.button("Get catalogs"):
68+
st.session_state["catalogs_df"] = get_catalogs()
69+
st.session_state["catalog_names"] = get_catalog_names()
5970

60-
st.write('### Databricks Schema')
61-
62-
schemas = get_schemas()
63-
st.dataframe(schemas)
71+
if "catalogs_df" in st.session_state:
72+
st.write("### Catalogs")
73+
st.dataframe(st.session_state["catalogs_df"])
74+
75+
st.write("### Select a Catalog to View its Schemas")
76+
selected_catalog = st.selectbox(
77+
"Choose a catalog", options=st.session_state["catalog_names"]
78+
)
79+
80+
if st.button("Get schemas for selected catalog"):
81+
schemas_df = get_schemas_for_catalog(selected_catalog)
82+
st.write(f"### Schemas for {selected_catalog}")
83+
if not schemas_df.empty:
84+
st.dataframe(schemas_df)
85+
else:
86+
st.info(f"No schemas found in the catalog '{selected_catalog}'")
6487

6588

6689
table = [
@@ -72,11 +95,11 @@ def get_schemas():
7295
```python
7396
from databricks.sdk import WorkspaceClient
7497
75-
76-
workspace = WorkspaceClient()
98+
99+
w = WorkspaceClient()
77100
78101
def get_catalogs():
79-
catalogs = workspace.catalogs.list()
102+
catalogs = w.catalogs.list()
80103
# Parse metadata into a list of dictionaries
81104
catalogs_data = []
82105
for catalog in catalogs:
@@ -94,37 +117,42 @@ def get_catalogs():
94117
""",
95118
},
96119
{
97-
"type": "Get Schemas",
98-
"param": "get_schemas",
99-
"description": "Get the schemas",
120+
"type": "Get Schemas for Selected Catalog",
121+
"param": "get_schemas_for_catalog",
122+
"description": "Get the schemas for a specific catalog",
100123
"code": """
101124
```python
102125
from databricks.sdk import WorkspaceClient
103126
104-
workspace = WorkspaceClient()
127+
128+
w = WorkspaceClient()
129+
130+
def get_catalog_names():
131+
catalogs = w.catalogs.list()
132+
return [catalog.name for catalog in catalogs]
105133
106-
def get_schemas():
134+
def get_schemas_for_catalog(catalog_name):
107135
schema_data = []
108-
for catalog in workspace.catalogs.list():
109-
schemas = workspace.schemas.list(catalog_name=catalog.name)
110-
for schema in schemas:
111-
print(schema.catalog_name)
112-
print(schema)
113-
schema_data.append({
114-
"Catalog Name": schema.catalog_name,
115-
"Catalog Type": schema.catalog_type,
116-
"Schema Name": schema.full_name,
117-
"Owner": schema.owner,
118-
"Comment": schema.comment,
119-
"Created At": datetime.datetime.fromtimestamp(schema.created_at/1000),
120-
"Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000),
121-
"Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
122-
"Properites": schema.properties
123-
124-
})
136+
schemas = w.schemas.list(catalog_name=catalog_name, max_results=10)
137+
for schema in schemas:
138+
schema_data.append({
139+
"Catalog Name": schema.catalog_name,
140+
"Catalog Type": schema.catalog_type,
141+
"Schema Name": schema.full_name,
142+
"Owner": schema.owner,
143+
"Comment": schema.comment,
144+
"Created At": datetime.datetime.fromtimestamp(schema.created_at/1000) if schema.created_at else None,
145+
"Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000) if schema.updated_at else None,
146+
"Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
147+
"Properties": schema.properties
148+
})
125149
return pd.DataFrame(schema_data)
126-
schemas = get_schemas()
127-
st.dataframe(schemas)
150+
151+
# In the UI:
152+
selected_catalog = st.selectbox("Choose a catalog", options=get_catalog_names())
153+
if st.button("Get Schemas"):
154+
schemas = get_schemas_for_catalog(selected_catalog)
155+
st.dataframe(schemas)
128156
```
129157
""",
130158
},
@@ -135,3 +163,29 @@ def get_schemas():
135163
with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)):
136164
st.markdown(f"**Description**: {row['description']}")
137165
st.markdown(row["code"])
166+
167+
with tab_c:
168+
st.info("""
169+
To list all catalogs, you need the [metastore admin](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/admin-privileges#metastore-admins) role.
170+
Otherwise, only catalogs for which you have the `USE_CATALOG` permission will be retrieved.
171+
""")
172+
173+
col1, col2, col3 = st.columns(3)
174+
175+
with col1:
176+
st.markdown("""
177+
**Permissions (app service principal)**
178+
* `USE_CATALOG` on the Unity Catalog catalogs to list
179+
* `USE_SCHEMA` on the schemas you want to view
180+
""")
181+
with col2:
182+
st.markdown("""
183+
**Databricks resources**
184+
* Unity Catalog enabled workspace
185+
""")
186+
with col3:
187+
st.markdown("""
188+
**Dependencies**
189+
* [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk`
190+
* [Streamlit](https://pypi.org/project/streamlit/) - `streamlit`
191+
""")

0 commit comments

Comments
 (0)