Skip to content

Commit c4c1ac8

Browse files
committed
Resolve conflict
1 parent a0f4814 commit c4c1ac8

File tree

1 file changed

+122
-64
lines changed

1 file changed

+122
-64
lines changed
Lines changed: 122 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,89 @@
11
import pandas as pd
22
import streamlit as st
33
import datetime
4-
import matplotlib.pyplot as plt
54
from databricks.sdk import WorkspaceClient
65

76

8-
workspace = WorkspaceClient()
7+
w = WorkspaceClient()
8+
99

1010
def get_catalogs():
11-
catalogs = workspace.catalogs.list()
12-
# Parse metadata into a list of dictionaries
11+
catalogs = w.catalogs.list()
12+
1313
catalogs_data = []
1414
for catalog in catalogs:
15-
catalogs_data.append({
16-
"Catalog Name": catalog.name,
17-
"Owner": catalog.owner,
18-
"Comment": catalog.comment,
19-
"Created At": datetime.datetime.fromtimestamp(catalog.created_at/1000),
20-
"Updated At": datetime.datetime.fromtimestamp(catalog.updated_at/1000),
21-
})
15+
catalogs_data.append(
16+
{
17+
"Catalog name": catalog.name,
18+
"Owner": catalog.owner,
19+
"Comment": catalog.comment,
20+
"Created at": datetime.datetime.fromtimestamp(
21+
catalog.created_at / 1000
22+
),
23+
"Updated at": datetime.datetime.fromtimestamp(
24+
catalog.updated_at / 1000
25+
),
26+
}
27+
)
2228
return pd.DataFrame(catalogs_data)
2329

24-
def get_schemas():
30+
31+
def get_catalog_names():
32+
catalogs = w.catalogs.list()
33+
return [catalog.name for catalog in catalogs]
34+
35+
36+
def get_schemas_for_catalog(catalog_name):
2537
schema_data = []
26-
for catalog in workspace.catalogs.list():
27-
schemas = workspace.schemas.list(catalog_name=catalog.name)
28-
for schema in schemas:
29-
print(schema.catalog_name)
30-
print(schema)
31-
schema_data.append({
32-
"Catalog Name": schema.catalog_name,
33-
"Catalog Type": schema.catalog_type,
34-
"Schema Name": schema.full_name,
38+
schemas = w.schemas.list(catalog_name=catalog_name, max_results=10)
39+
for schema in schemas:
40+
schema_data.append(
41+
{
42+
"Catalog name": schema.catalog_name,
43+
"Catalog type": schema.catalog_type,
44+
"Schema name": schema.full_name,
3545
"Owner": schema.owner,
3646
"Comment": schema.comment,
37-
"Created At": datetime.datetime.fromtimestamp(schema.created_at/1000),
38-
"Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000),
39-
"Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
40-
"Properites": schema.properties
41-
42-
})
47+
"Created at": datetime.datetime.fromtimestamp(schema.created_at / 1000)
48+
if schema.created_at
49+
else None,
50+
"Updated at": datetime.datetime.fromtimestamp(schema.updated_at / 1000)
51+
if schema.updated_at
52+
else None,
53+
"Effective predictive optimization": schema.effective_predictive_optimization_flag,
54+
"Properties": schema.properties,
55+
}
56+
)
4357
return pd.DataFrame(schema_data)
4458

4559

46-
4760
st.header(body="Unity Catalog", divider=True)
4861
st.subheader("Get catalog and schema information")
49-
st.write(
50-
"This receipt gets the meta data for the catalogs and the schemas."
51-
)
62+
st.write("This receipt lists metadata for catalogs and schemas in Unity Catalog.")
5263

53-
tab_a, tab_b = st.tabs(["**Try it**", "**Code snippets**"])
64+
tab_a, tab_b, tab_c = st.tabs(["**Try it**", "**Code snippets**", "**Requirements**"])
5465

5566
with tab_a:
56-
if st.button("Try It"):
57-
st.write('### Databricks Catalogs')
58-
st.dataframe(get_catalogs())
67+
if st.button("Get catalogs"):
68+
st.session_state["catalogs_df"] = get_catalogs()
69+
st.session_state["catalog_names"] = get_catalog_names()
5970

60-
st.write('### Databricks Schema')
71+
if "catalogs_df" in st.session_state:
72+
st.write("### Catalogs")
73+
st.dataframe(st.session_state["catalogs_df"])
6174

62-
schemas = get_schemas()
63-
st.dataframe(schemas)
75+
st.write("### Select a Catalog to View its Schemas")
76+
selected_catalog = st.selectbox(
77+
"Choose a catalog", options=st.session_state["catalog_names"]
78+
)
79+
80+
if st.button("Get schemas for selected catalog"):
81+
schemas_df = get_schemas_for_catalog(selected_catalog)
82+
st.write(f"### Schemas for {selected_catalog}")
83+
if not schemas_df.empty:
84+
st.dataframe(schemas_df)
85+
else:
86+
st.info(f"No schemas found in the catalog '{selected_catalog}'")
6487

6588

6689
table = [
@@ -71,9 +94,12 @@ def get_schemas():
7194
"code": """
7295
```python
7396
from databricks.sdk import WorkspaceClient
74-
workspace = WorkspaceClient()
97+
98+
99+
w = WorkspaceClient()
100+
75101
def get_catalogs():
76-
catalogs = workspace.catalogs.list()
102+
catalogs = w.catalogs.list()
77103
# Parse metadata into a list of dictionaries
78104
catalogs_data = []
79105
for catalog in catalogs:
@@ -91,36 +117,42 @@ def get_catalogs():
91117
""",
92118
},
93119
{
94-
"type": "Get Schemas",
95-
"param": "get_schemas",
96-
"description": "Get the schemas",
120+
"type": "Get Schemas for Selected Catalog",
121+
"param": "get_schemas_for_catalog",
122+
"description": "Get the schemas for a specific catalog",
97123
"code": """
98124
```python
99125
from databricks.sdk import WorkspaceClient
100126
101-
workspace = WorkspaceClient()
127+
128+
w = WorkspaceClient()
129+
130+
def get_catalog_names():
131+
catalogs = w.catalogs.list()
132+
return [catalog.name for catalog in catalogs]
102133
103-
def get_schemas():
134+
def get_schemas_for_catalog(catalog_name):
104135
schema_data = []
105-
for catalog in workspace.catalogs.list():
106-
schemas = workspace.schemas.list(catalog_name=catalog.name)
107-
for schema in schemas:
108-
print(schema.catalog_name)
109-
print(schema)
110-
schema_data.append({
111-
"Catalog Name": schema.catalog_name,
112-
"Catalog Type": schema.catalog_type,
113-
"Schema Name": schema.full_name,
114-
"Owner": schema.owner,
115-
"Comment": schema.comment,
116-
"Created At": datetime.datetime.fromtimestamp(schema.created_at/1000),
117-
"Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000),
118-
"Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
119-
"Properites": schema.properties
120-
})
136+
schemas = w.schemas.list(catalog_name=catalog_name, max_results=10)
137+
for schema in schemas:
138+
schema_data.append({
139+
"Catalog Name": schema.catalog_name,
140+
"Catalog Type": schema.catalog_type,
141+
"Schema Name": schema.full_name,
142+
"Owner": schema.owner,
143+
"Comment": schema.comment,
144+
"Created At": datetime.datetime.fromtimestamp(schema.created_at/1000) if schema.created_at else None,
145+
"Updated At": datetime.datetime.fromtimestamp(schema.updated_at/1000) if schema.updated_at else None,
146+
"Effective Predictive Optimization": schema.effective_predictive_optimization_flag,
147+
"Properties": schema.properties
148+
})
121149
return pd.DataFrame(schema_data)
122-
schemas = get_schemas()
123-
st.dataframe(schemas)
150+
151+
# In the UI:
152+
selected_catalog = st.selectbox("Choose a catalog", options=get_catalog_names())
153+
if st.button("Get Schemas"):
154+
schemas = get_schemas_for_catalog(selected_catalog)
155+
st.dataframe(schemas)
124156
```
125157
""",
126158
},
@@ -130,4 +162,30 @@ def get_schemas():
130162
for i, row in enumerate(table):
131163
with st.expander(f"**{row['type']} ({row['param']})**", expanded=(i == 0)):
132164
st.markdown(f"**Description**: {row['description']}")
133-
st.markdown(row["code"])
165+
st.markdown(row["code"])
166+
167+
with tab_c:
168+
st.info("""
169+
To list all catalogs, you need the [metastore admin](https://docs.databricks.com/aws/en/data-governance/unity-catalog/manage-privileges/admin-privileges#metastore-admins) role.
170+
Otherwise, only catalogs for which you have the `USE_CATALOG` permission will be retrieved.
171+
""")
172+
173+
col1, col2, col3 = st.columns(3)
174+
175+
with col1:
176+
st.markdown("""
177+
**Permissions (app service principal)**
178+
* `USE_CATALOG` on the Unity Catalog catalogs to list
179+
* `USE_SCHEMA` on the schemas you want to view
180+
""")
181+
with col2:
182+
st.markdown("""
183+
**Databricks resources**
184+
* Unity Catalog enabled workspace
185+
""")
186+
with col3:
187+
st.markdown("""
188+
**Dependencies**
189+
* [Databricks SDK](https://pypi.org/project/databricks-sdk/) - `databricks-sdk`
190+
* [Streamlit](https://pypi.org/project/streamlit/) - `streamlit`
191+
""")

0 commit comments

Comments
 (0)