Skip to content

Commit b95c5f2

Browse files
committed
chore: Clean up DataHub metadata fetcher and remove debug print
1 parent d1607ce commit b95c5f2

File tree

2 files changed

+13
-28
lines changed

2 files changed

+13
-28
lines changed

data_utils/datahub_source.py

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
from datahub.metadata.schema_classes import DatasetPropertiesClass, SchemaMetadataClass
44
from datahub.emitter.rest_emitter import DatahubRestEmitter
55

6+
67
class DatahubMetadataFetcher:
78
def __init__(self, gms_server="http://localhost:8080", extra_headers={}):
8-
self.emitter = DatahubRestEmitter(gms_server=gms_server, extra_headers=extra_headers)
9+
self.emitter = DatahubRestEmitter(
10+
gms_server=gms_server, extra_headers=extra_headers
11+
)
912
self.datahub_graph = self.emitter.to_graph()
1013

1114
def get_urns(self):
@@ -15,8 +18,7 @@ def get_urns(self):
1518
def get_table_name(self, urn):
1619
# URN에 대한 테이블 이름 가져오기
1720
dataset_properties = self.datahub_graph.get_aspect(
18-
urn,
19-
aspect_type=DatasetPropertiesClass
21+
urn, aspect_type=DatasetPropertiesClass
2022
)
2123
if dataset_properties:
2224
return dataset_properties.get("name", None)
@@ -25,8 +27,7 @@ def get_table_name(self, urn):
2527
def get_table_description(self, urn):
2628
# URN에 대한 테이블 설명 가져오기
2729
dataset_properties = self.datahub_graph.get_aspect(
28-
urn,
29-
aspect_type=DatasetPropertiesClass
30+
urn, aspect_type=DatasetPropertiesClass
3031
)
3132
if dataset_properties:
3233
return dataset_properties.get("description", None)
@@ -35,30 +36,15 @@ def get_table_description(self, urn):
3536
def get_column_names_and_descriptions(self, urn):
3637
# URN에 대한 컬럼 이름 및 설명 가져오기
3738
schema_metadata = self.datahub_graph.get_aspect(
38-
urn,
39-
aspect_type=SchemaMetadataClass
39+
urn, aspect_type=SchemaMetadataClass
4040
)
4141
columns = []
4242
if schema_metadata:
4343
for field in schema_metadata.fields:
44-
columns.append({
45-
"column_name": field.fieldPath,
46-
"column_description": field.description
47-
})
44+
columns.append(
45+
{
46+
"column_name": field.fieldPath,
47+
"column_description": field.description,
48+
}
49+
)
4850
return columns
49-
50-
# # 사용 예시
51-
# fetcher = DatahubMetadataFetcher()
52-
# urns = fetcher.get_urns()
53-
54-
# for urn in urns:
55-
# table_name = fetcher.get_table_name(urn)
56-
# table_description = fetcher.get_table_description(urn)
57-
# columns = fetcher.get_column_names_and_descriptions(urn)
58-
59-
# print(f"Table Name: {table_name}")
60-
# print(f"Table Description: {table_description}")
61-
# for column in columns:
62-
# print(f"Column Name: {column['column_name']}")
63-
# print(f"Column Description: {column['column_description']}")
64-
# print("-" * 60)

interface/streamlit_app.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
def summarize_total_tokens(data):
1717
total_tokens = 0
1818
for item in data:
19-
print(item)
2019
token_usage = getattr(item, "usage_metadata", {})
2120
total_tokens += token_usage.get("total_tokens", 0)
2221
return total_tokens

0 commit comments

Comments
 (0)