2121)
2222
2323
24+ def _prepare_records (df : pd .DataFrame ) -> list [dict ]:
25+ """
26+ Reset index and convert the DataFrame to a list of dictionaries.
27+
28+ We rename the reset index column to 'Index' for consistency.
29+ """
30+ df_reset = df .reset_index ().rename (columns = {"index" : "Index" })
31+ return df_reset .to_dict ("records" )
32+
33+
2434def read_entities (
2535 df : pd .DataFrame ,
2636 id_col : str = "id" ,
@@ -35,12 +45,14 @@ def read_entities(
3545 rank_col : str | None = "degree" ,
3646 attributes_cols : list [str ] | None = None ,
3747) -> list [Entity ]:
38- """Read entities from a dataframe."""
39- entities = []
40- for idx , row in df . iterrows ():
41- entity = Entity (
48+ """Read entities from a dataframe using pre-converted records ."""
49+ records = _prepare_records ( df )
50+ return [
51+ Entity (
4252 id = to_str (row , id_col ),
43- short_id = to_optional_str (row , short_id_col ) if short_id_col else str (idx ),
53+ short_id = to_optional_str (row , short_id_col )
54+ if short_id_col
55+ else str (row ["Index" ]),
4456 title = to_str (row , title_col ),
4557 type = to_optional_str (row , type_col ),
4658 description = to_optional_str (row , description_col ),
@@ -57,8 +69,8 @@ def read_entities(
5769 else None
5870 ),
5971 )
60- entities . append ( entity )
61- return entities
72+ for row in records
73+ ]
6274
6375
6476def read_relationships (
@@ -74,12 +86,14 @@ def read_relationships(
7486 text_unit_ids_col : str | None = "text_unit_ids" ,
7587 attributes_cols : list [str ] | None = None ,
7688) -> list [Relationship ]:
77- """Read relationships from a dataframe."""
78- relationships = []
79- for idx , row in df . iterrows ():
80- rel = Relationship (
89+ """Read relationships from a dataframe using pre-converted records ."""
90+ records = _prepare_records ( df )
91+ return [
92+ Relationship (
8193 id = to_str (row , id_col ),
82- short_id = to_optional_str (row , short_id_col ) if short_id_col else str (idx ),
94+ short_id = to_optional_str (row , short_id_col )
95+ if short_id_col
96+ else str (row ["Index" ]),
8397 source = to_str (row , source_col ),
8498 target = to_str (row , target_col ),
8599 description = to_optional_str (row , description_col ),
@@ -95,8 +109,8 @@ def read_relationships(
95109 else None
96110 ),
97111 )
98- relationships . append ( rel )
99- return relationships
112+ for row in records
113+ ]
100114
101115
102116def read_covariates (
@@ -108,12 +122,14 @@ def read_covariates(
108122 text_unit_ids_col : str | None = "text_unit_ids" ,
109123 attributes_cols : list [str ] | None = None ,
110124) -> list [Covariate ]:
111- """Read covariates from a dataframe."""
112- covariates = []
113- for idx , row in df . iterrows ():
114- cov = Covariate (
125+ """Read covariates from a dataframe using pre-converted records ."""
126+ records = _prepare_records ( df )
127+ return [
128+ Covariate (
115129 id = to_str (row , id_col ),
116- short_id = to_optional_str (row , short_id_col ) if short_id_col else str (idx ),
130+ short_id = to_optional_str (row , short_id_col )
131+ if short_id_col
132+ else str (row ["Index" ]),
117133 subject_id = to_str (row , subject_col ),
118134 covariate_type = (
119135 to_str (row , covariate_type_col ) if covariate_type_col else "claim"
@@ -125,8 +141,8 @@ def read_covariates(
125141 else None
126142 ),
127143 )
128- covariates . append ( cov )
129- return covariates
144+ for row in records
145+ ]
130146
131147
132148def read_communities (
@@ -141,12 +157,14 @@ def read_communities(
141157 sub_communities_col : str | None = "sub_community_ids" ,
142158 attributes_cols : list [str ] | None = None ,
143159) -> list [Community ]:
144- """Read communities from a dataframe."""
145- communities = []
146- for idx , row in df . iterrows ():
147- comm = Community (
160+ """Read communities from a dataframe using pre-converted records ."""
161+ records = _prepare_records ( df )
162+ return [
163+ Community (
148164 id = to_str (row , id_col ),
149- short_id = to_optional_str (row , short_id_col ) if short_id_col else str (idx ),
165+ short_id = to_optional_str (row , short_id_col )
166+ if short_id_col
167+ else str (row ["Index" ]),
150168 title = to_str (row , title_col ),
151169 level = to_str (row , level_col ),
152170 entity_ids = to_optional_list (row , entities_col , item_type = str ),
@@ -161,8 +179,8 @@ def read_communities(
161179 else None
162180 ),
163181 )
164- communities . append ( comm )
165- return communities
182+ for row in records
183+ ]
166184
167185
168186def read_community_reports (
@@ -177,12 +195,14 @@ def read_community_reports(
177195 content_embedding_col : str | None = "full_content_embedding" ,
178196 attributes_cols : list [str ] | None = None ,
179197) -> list [CommunityReport ]:
180- """Read community reports from a dataframe."""
181- reports = []
182- for idx , row in df . iterrows ():
183- report = CommunityReport (
198+ """Read community reports from a dataframe using pre-converted records ."""
199+ records = _prepare_records ( df )
200+ return [
201+ CommunityReport (
184202 id = to_str (row , id_col ),
185- short_id = to_optional_str (row , short_id_col ) if short_id_col else str (idx ),
203+ short_id = to_optional_str (row , short_id_col )
204+ if short_id_col
205+ else str (row ["Index" ]),
186206 title = to_str (row , title_col ),
187207 community_id = to_str (row , community_col ),
188208 summary = to_str (row , summary_col ),
@@ -197,8 +217,8 @@ def read_community_reports(
197217 else None
198218 ),
199219 )
200- reports . append ( report )
201- return reports
220+ for row in records
221+ ]
202222
203223
204224def read_text_units (
@@ -212,12 +232,12 @@ def read_text_units(
212232 document_ids_col : str | None = "document_ids" ,
213233 attributes_cols : list [str ] | None = None ,
214234) -> list [TextUnit ]:
215- """Read text units from a dataframe."""
216- text_units = []
217- for idx , row in df . iterrows ():
218- chunk = TextUnit (
235+ """Read text units from a dataframe using pre-converted records ."""
236+ records = _prepare_records ( df )
237+ return [
238+ TextUnit (
219239 id = to_str (row , id_col ),
220- short_id = str (idx ),
240+ short_id = str (row [ "Index" ] ),
221241 text = to_str (row , text_col ),
222242 entity_ids = to_optional_list (row , entities_col , item_type = str ),
223243 relationship_ids = to_optional_list (row , relationships_col , item_type = str ),
@@ -232,5 +252,5 @@ def read_text_units(
232252 else None
233253 ),
234254 )
235- text_units . append ( chunk )
236- return text_units
255+ for row in records
256+ ]
0 commit comments