Revert "add abstract DataReader for proteins repo to override token path"

aditya0by0 · aditya0by0 · commit 7cf059c09a95 · 2025-05-12T20:00:48.000+02:00
This reverts commit a8823c8.
diff --git a/chebai_proteins/preprocessing/reader.py b/chebai_proteins/preprocessing/reader.py
@@ -1,5 +1,4 @@
 import os
-from abc import ABC
 from pathlib import Path
 from typing import List, Optional, Tuple
 from urllib.error import HTTPError
@@ -13,15 +12,7 @@
 from esm.pretrained import load_model_and_alphabet_core
 
 
-class _ChebaiProteinsDataReader(DataReader, ABC):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # This to override the token directory path which points to `chebai` repo instead of `chebai-proteins` to
-        # search for tokens.txt files for readers defined in `chebai-proteins` repository.
-        self.dirname = os.path.dirname(__file__)
-
-
-class ProteinDataReader(TokenIndexerReader, _ChebaiProteinsDataReader):
+class ProteinDataReader(TokenIndexerReader):
     """
     Data reader for protein sequences using amino acid tokens. This class processes raw protein sequences into a format
     suitable for model input by tokenizing them and assigning unique indices to each token.
@@ -131,7 +122,7 @@ def _read_data(self, raw_data: str) -> List[int]:
         return [self._get_token_index(aa) for aa in raw_data]
 
 
-class ESM2EmbeddingReader(_ChebaiProteinsDataReader):
+class ESM2EmbeddingReader(DataReader):
     """
     A data reader to process protein sequences using the ESM2 model for embeddings.