Skip to content

Commit 383b210

Browse files
committed
fix to access max seq len in name prop
1 parent 710d703 commit 383b210

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

chebai/preprocessing/datasets/go_uniprot.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ class _GOUniProtDataExtractor(_DynamicDataset, ABC):
7373

7474
def __init__(self, **kwargs):
7575
self.go_branch: str = self._get_go_branch(**kwargs)
76-
super(_GOUniProtDataExtractor, self).__init__(**kwargs)
7776

7877
self.max_sequence_length: int = int(kwargs.get("max_sequence_length", 1002))
7978
assert (
8079
self.max_sequence_length >= 1
8180
), "Max sequence length should be greater than or equal to 1."
8281

82+
super(_GOUniProtDataExtractor, self).__init__(**kwargs)
83+
8384
if self.reader.n_gram is not None:
8485
assert self.max_sequence_length >= self.reader.n_gram, (
8586
f"max_sequence_length ({self.max_sequence_length}) must be greater than "
@@ -415,7 +416,7 @@ def _get_swiss_to_go_mapping(self) -> pd.DataFrame:
415416
# To consider only manually-annotated swiss data
416417
continue
417418

418-
if not record.sequence or record.sequence > self.max_sequence_length:
419+
if not record.sequence or len(record.sequence) > self.max_sequence_length:
419420
# Consider protein with only sequence representation and seq. length not greater than max seq. length
420421
continue
421422

0 commit comments

Comments
 (0)