Skip to content

Commit 7cf059c

Browse files
committed
Revert "add abstract DataReader for proteins repo to override token path"
This reverts commit a8823c8.
1 parent cd92ca5 commit 7cf059c

File tree

1 file changed

+2
-11
lines changed

1 file changed

+2
-11
lines changed

chebai_proteins/preprocessing/reader.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
from abc import ABC
32
from pathlib import Path
43
from typing import List, Optional, Tuple
54
from urllib.error import HTTPError
@@ -13,15 +12,7 @@
1312
from esm.pretrained import load_model_and_alphabet_core
1413

1514

16-
class _ChebaiProteinsDataReader(DataReader, ABC):
17-
def __init__(self, *args, **kwargs):
18-
super().__init__(*args, **kwargs)
19-
# This to override the token directory path which points to `chebai` repo instead of `chebai-proteins` to
20-
# search for tokens.txt files for readers defined in `chebai-proteins` repository.
21-
self.dirname = os.path.dirname(__file__)
22-
23-
24-
class ProteinDataReader(TokenIndexerReader, _ChebaiProteinsDataReader):
15+
class ProteinDataReader(TokenIndexerReader):
2516
"""
2617
Data reader for protein sequences using amino acid tokens. This class processes raw protein sequences into a format
2718
suitable for model input by tokenizing them and assigning unique indices to each token.
@@ -131,7 +122,7 @@ def _read_data(self, raw_data: str) -> List[int]:
131122
return [self._get_token_index(aa) for aa in raw_data]
132123

133124

134-
class ESM2EmbeddingReader(_ChebaiProteinsDataReader):
125+
class ESM2EmbeddingReader(DataReader):
135126
"""
136127
A data reader to process protein sequences using the ESM2 model for embeddings.
137128

0 commit comments

Comments
 (0)