@@ -43,7 +43,7 @@ async def run_workflow(
4343 config .root_dir , summarization_llm_settings
4444 )
4545
46- entities , relationships = await extract_graph (
46+ entities , relationships , raw_entities , raw_relationships = await extract_graph (
4747 text_units = text_units ,
4848 callbacks = context .callbacks ,
4949 cache = context .cache ,
@@ -58,6 +58,12 @@ async def run_workflow(
5858 await write_table_to_storage (entities , "entities" , context .storage )
5959 await write_table_to_storage (relationships , "relationships" , context .storage )
6060
61+ if config .snapshots .raw_graph :
62+ await write_table_to_storage (raw_entities , "raw_entities" , context .storage )
63+ await write_table_to_storage (
64+ raw_relationships , "raw_relationships" , context .storage
65+ )
66+
6167 return WorkflowFunctionOutput (
6268 result = {
6369 "entities" : entities ,
@@ -76,7 +82,7 @@ async def extract_graph(
7682 entity_types : list [str ] | None = None ,
7783 summarization_strategy : dict [str , Any ] | None = None ,
7884 summarization_num_threads : int = 4 ,
79- ) -> tuple [pd .DataFrame , pd .DataFrame ]:
85+ ) -> tuple [pd .DataFrame , pd .DataFrame , pd . DataFrame , pd . DataFrame ]:
8086 """All the steps to create the base entity graph."""
8187 # this returns a graph for each text unit, to be merged later
8288 extracted_entities , extracted_relationships = await extractor (
@@ -103,6 +109,10 @@ async def extract_graph(
103109 callbacks .error (error_msg )
104110 raise ValueError (error_msg )
105111
112+ # copy these as is before any summarization
113+ raw_entities = extracted_entities .copy ()
114+ raw_relationships = extracted_relationships .copy ()
115+
106116 entities , relationships = await get_summarized_entities_relationships (
107117 extracted_entities = extracted_entities ,
108118 extracted_relationships = extracted_relationships ,
@@ -112,7 +122,7 @@ async def extract_graph(
112122 summarization_num_threads = summarization_num_threads ,
113123 )
114124
115- return (entities , relationships )
125+ return (entities , relationships , raw_entities , raw_relationships )
116126
117127
118128async def get_summarized_entities_relationships (
0 commit comments