Skip to content

Commit 831f70d

Browse files
committed
scope: filter out sequence with len gt than given len
1 parent 58bcf05 commit 831f70d

File tree

1 file changed

+3
-1
lines changed
  • chebai/preprocessing/datasets/scope

1 file changed

+3
-1
lines changed

chebai/preprocessing/datasets/scope/scope.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,12 @@ def __init__(
7272
self,
7373
scope_version: str,
7474
scope_version_train: Optional[str] = None,
75+
max_sequence_len: int = 1000,
7576
**kwargs,
7677
):
7778
self.scope_version: str = scope_version
7879
self.scope_version_train: str = scope_version_train
80+
self.max_sequence_len: int = max_sequence_len
7981

8082
super(_SCOPeDataExtractor, self).__init__(**kwargs)
8183

@@ -545,7 +547,7 @@ def _parse_pdb_sequence_file(self) -> pd.DataFrame:
545547
os.path.join(self.scope_root_dir, self.raw_file_names_dict["PDB"]), "fasta"
546548
):
547549

548-
if not record.seq:
550+
if not record.seq or len(record.seq) > self.max_sequence_len:
549551
continue
550552

551553
pdb_id, chain = record.id.split("_")

0 commit comments

Comments
 (0)