Skip to content

Commit 0906b06

Browse files
committed
Update new creator
1 parent 87189c2 commit 0906b06

File tree

1 file changed

+71
-18
lines changed

1 file changed

+71
-18
lines changed

text_2_sql/text_2_sql_core/src/text_2_sql_core/data_dictionary/data_dictionary_creator.py

Lines changed: 71 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ class ColumnItem(BaseModel):
104104

105105
model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)
106106

107+
def value_store_entry(
108+
self, entity, distinct_value, excluded_fields_for_database_engine
109+
):
110+
initial_entry = entity.value_store_entry(excluded_fields_for_database_engine)
111+
112+
initial_entry["Value"] = distinct_value
113+
initial_entry["Synonyms"] = []
114+
return initial_entry
115+
107116
@classmethod
108117
def from_sql_row(cls, row, columns):
109118
"""A method to create a ColumnItem from a SQL row."""
@@ -141,6 +150,26 @@ class EntityItem(BaseModel):
141150

142151
model_config = ConfigDict(populate_by_name=True)
143152

153+
@property
154+
def id(self):
155+
identifiers = [self.warehouse, self.catalog, self.database, self.entity]
156+
non_null_identifiers = [x for x in identifiers if x is not None]
157+
158+
return ".".join(non_null_identifiers)
159+
160+
def value_store_entry(self, excluded_fields_for_database_engine):
161+
excluded_fields = excluded_fields_for_database_engine + [
162+
"Definition",
163+
"Name",
164+
"EntityName",
165+
"EntityRelationships",
166+
"CompleteEntityRelationshipsGraph",
167+
"Columns",
168+
]
169+
return self.model_dump(
170+
by_alias=True, exclude_none=True, exclude=excluded_fields
171+
)
172+
144173
@classmethod
145174
def from_sql_row(cls, row, columns):
146175
"""A method to create an EntityItem from a SQL row."""
@@ -407,6 +436,25 @@ async def extract_entities_with_definitions(self) -> list[EntityItem]:
407436

408437
return all_entities
409438

439+
async def write_columns_to_file(self, entity: EntityItem, column: ColumnItem):
440+
logging.info(f"Saving column values for {column.name}")
441+
442+
key = f"{entity.id}.{column.name}"
443+
with open(
444+
f"{self.output_directory}/column_value_store/{key}.jsonl",
445+
"w",
446+
encoding="utf-8",
447+
) as f:
448+
for distinct_value in column.distinct_values:
449+
json.dump(
450+
column.value_store_entry(
451+
entity, distinct_value, self.excluded_fields_for_database_engine
452+
),
453+
f,
454+
indent=4,
455+
default=str,
456+
)
457+
410458
async def extract_column_distinct_values(
411459
self, entity: EntityItem, column: ColumnItem
412460
):
@@ -620,6 +668,23 @@ async def generate_entity_definition(self, entity: EntityItem):
620668
logging.info(f"definition for {entity.entity}: {definition}")
621669
entity.definition = definition
622670

671+
async def write_entity_to_file(self, entity):
672+
logging.info(f"Saving data dictionary for {entity.entity}")
673+
with open(
674+
f"{self.output_directory}/schema_store/{entity.id}.json",
675+
"w",
676+
encoding="utf-8",
677+
) as f:
678+
json.dump(
679+
entity.model_dump(
680+
by_alias=True,
681+
exclude=self.excluded_fields_for_database_engine,
682+
),
683+
f,
684+
indent=4,
685+
default=str,
686+
)
687+
623688
async def build_entity_entry(self, entity: EntityItem) -> EntityItem:
624689
"""A method to build an entity entry.
625690
@@ -647,6 +712,9 @@ async def build_entity_entry(self, entity: EntityItem) -> EntityItem:
647712
self.get_entity_relationships_from_graph(entity.entity)
648713
)
649714

715+
if self.single_file is False:
716+
await self.write_entity_to_file(entity)
717+
650718
return entity
651719

652720
@property
@@ -685,7 +753,9 @@ async def create_data_dictionary(self):
685753
if self.single_file:
686754
logging.info("Saving data dictionary to entities.json")
687755
with open(
688-
f"{self.output_directory}/entities.json", "w", encoding="utf-8"
756+
f"{self.output_directory}/schema_store/entities.json",
757+
"w",
758+
encoding="utf-8",
689759
) as f:
690760
data_dictionary_dump = [
691761
entity.model_dump(
@@ -699,20 +769,3 @@ async def create_data_dictionary(self):
699769
indent=4,
700770
default=str,
701771
)
702-
else:
703-
for entity in data_dictionary:
704-
logging.info(f"Saving data dictionary for {entity.entity}")
705-
with open(
706-
f"{self.output_directory}/{entity.entity}.json",
707-
"w",
708-
encoding="utf-8",
709-
) as f:
710-
json.dump(
711-
entity.model_dump(
712-
by_alias=True,
713-
exclude=self.excluded_fields_for_database_engine,
714-
),
715-
f,
716-
indent=4,
717-
default=str,
718-
)

0 commit comments

Comments
 (0)