@@ -104,6 +104,15 @@ class ColumnItem(BaseModel):
104104
105105 model_config = ConfigDict (populate_by_name = True , arbitrary_types_allowed = True )
106106
107+ def value_store_entry (
108+ self , entity , distinct_value , excluded_fields_for_database_engine
109+ ):
110+ initial_entry = entity .value_store_entry (excluded_fields_for_database_engine )
111+
112+ initial_entry ["Value" ] = distinct_value
113+ initial_entry ["Synonyms" ] = []
114+ return initial_entry
115+
107116 @classmethod
108117 def from_sql_row (cls , row , columns ):
109118 """A method to create a ColumnItem from a SQL row."""
@@ -141,6 +150,26 @@ class EntityItem(BaseModel):
141150
142151 model_config = ConfigDict (populate_by_name = True )
143152
153+ @property
154+ def id (self ):
155+ identifiers = [self .warehouse , self .catalog , self .database , self .entity ]
156+ non_null_identifiers = [x for x in identifiers if x is not None ]
157+
158+ return "." .join (non_null_identifiers )
159+
160+ def value_store_entry (self , excluded_fields_for_database_engine ):
161+ excluded_fields = excluded_fields_for_database_engine + [
162+ "Definition" ,
163+ "Name" ,
164+ "EntityName" ,
165+ "EntityRelationships" ,
166+ "CompleteEntityRelationshipsGraph" ,
167+ "Columns" ,
168+ ]
169+ return self .model_dump (
170+ by_alias = True , exclude_none = True , exclude = excluded_fields
171+ )
172+
144173 @classmethod
145174 def from_sql_row (cls , row , columns ):
146175 """A method to create an EntityItem from a SQL row."""
@@ -407,6 +436,25 @@ async def extract_entities_with_definitions(self) -> list[EntityItem]:
407436
408437 return all_entities
409438
439+ async def write_columns_to_file (self , entity : EntityItem , column : ColumnItem ):
440+ logging .info (f"Saving column values for { column .name } " )
441+
442+ key = f"{ entity .id } .{ column .name } "
443+ with open (
444+ f"{ self .output_directory } /column_value_store/{ key } .jsonl" ,
445+ "w" ,
446+ encoding = "utf-8" ,
447+ ) as f :
448+ for distinct_value in column .distinct_values :
449+ json .dump (
450+ column .value_store_entry (
451+ entity , distinct_value , self .excluded_fields_for_database_engine
452+ ),
453+ f ,
454+ indent = 4 ,
455+ default = str ,
456+ )
457+
410458 async def extract_column_distinct_values (
411459 self , entity : EntityItem , column : ColumnItem
412460 ):
@@ -620,6 +668,23 @@ async def generate_entity_definition(self, entity: EntityItem):
620668 logging .info (f"definition for { entity .entity } : { definition } " )
621669 entity .definition = definition
622670
671+ async def write_entity_to_file (self , entity ):
672+ logging .info (f"Saving data dictionary for { entity .entity } " )
673+ with open (
674+ f"{ self .output_directory } /schema_store/{ entity .id } .json" ,
675+ "w" ,
676+ encoding = "utf-8" ,
677+ ) as f :
678+ json .dump (
679+ entity .model_dump (
680+ by_alias = True ,
681+ exclude = self .excluded_fields_for_database_engine ,
682+ ),
683+ f ,
684+ indent = 4 ,
685+ default = str ,
686+ )
687+
623688 async def build_entity_entry (self , entity : EntityItem ) -> EntityItem :
624689 """A method to build an entity entry.
625690
@@ -647,6 +712,9 @@ async def build_entity_entry(self, entity: EntityItem) -> EntityItem:
647712 self .get_entity_relationships_from_graph (entity .entity )
648713 )
649714
715+ if self .single_file is False :
716+ await self .write_entity_to_file (entity )
717+
650718 return entity
651719
652720 @property
@@ -685,7 +753,9 @@ async def create_data_dictionary(self):
685753 if self .single_file :
686754 logging .info ("Saving data dictionary to entities.json" )
687755 with open (
688- f"{ self .output_directory } /entities.json" , "w" , encoding = "utf-8"
756+ f"{ self .output_directory } /schema_store/entities.json" ,
757+ "w" ,
758+ encoding = "utf-8" ,
689759 ) as f :
690760 data_dictionary_dump = [
691761 entity .model_dump (
@@ -699,20 +769,3 @@ async def create_data_dictionary(self):
699769 indent = 4 ,
700770 default = str ,
701771 )
702- else :
703- for entity in data_dictionary :
704- logging .info (f"Saving data dictionary for { entity .entity } " )
705- with open (
706- f"{ self .output_directory } /{ entity .entity } .json" ,
707- "w" ,
708- encoding = "utf-8" ,
709- ) as f :
710- json .dump (
711- entity .model_dump (
712- by_alias = True ,
713- exclude = self .excluded_fields_for_database_engine ,
714- ),
715- f ,
716- indent = 4 ,
717- default = str ,
718- )
0 commit comments