Skip to content

Commit 4a8f821

Browse files
committed
deepgo2 migration: exp_annoations not needed
- prop annotations has both direct and transitive annotations
1 parent fd6dd01 commit 4a8f821

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

chebai/preprocessing/migration/deep_go/migrate_deep_go_2_data.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,8 @@ def _extract_required_data_from_splits(self) -> pd.DataFrame:
213213
"proteins",
214214
"accessions",
215215
"sequences",
216-
# https://github.com/bio-ontology-research-group/deepgo2/blob/main/gendata/uni2pandas.py#L45-L58
217-
"exp_annotations", # Directly associated GO ids
218216
# https://github.com/bio-ontology-research-group/deepgo2/blob/main/gendata/uni2pandas.py#L60-L69
219-
"prop_annotations", # Transitively associated GO ids
217+
"prop_annotations", # Direct and Transitively associated GO ids
220218
"esm2",
221219
]
222220

@@ -228,10 +226,8 @@ def _extract_required_data_from_splits(self) -> pd.DataFrame:
228226
],
229227
ignore_index=True,
230228
)
231-
new_df["go_ids"] = new_df.apply(
232-
lambda row: self.extract_go_id(row["exp_annotations"])
233-
+ self.extract_go_id(row["prop_annotations"]),
234-
axis=1,
229+
new_df["go_ids"] = new_df["prop_annotations"].apply(
230+
lambda x: self.extract_go_id(x)
235231
)
236232

237233
data_df = pd.DataFrame(
@@ -270,7 +266,7 @@ def _generate_labels(self, data_df: pd.DataFrame) -> pd.DataFrame:
270266
"""
271267
print("Generating labels based on terms.pkl file.......")
272268
parsed_go_ids: pd.Series = self._terms_df["gos"].apply(
273-
lambda gos: DeepGO2MigratedData._parse_go_id(gos)
269+
DeepGO2MigratedData._parse_go_id
274270
)
275271
all_go_ids_list = parsed_go_ids.values.tolist()
276272
self._classes = all_go_ids_list

0 commit comments

Comments
 (0)