55from itertools import islice
66from typing import Any , Dict , List , Optional
77
8- import deepsmiles
9- import selfies as sf
108from pysmiles .read_smiles import _tokenize
11- from transformers import RobertaTokenizerFast
129
1310from chebai .preprocessing .collate import DefaultCollator , RaggedCollator
1411
@@ -205,6 +202,8 @@ class DeepChemDataReader(ChemDataReader):
205202 """
206203
207204 def __init__ (self , * args , ** kwargs ):
205+ import deepsmiles
206+
208207 super ().__init__ (* args , ** kwargs )
209208 self .converter = deepsmiles .Converter (rings = True , branches = True )
210209 self .error_count = 0
@@ -279,6 +278,8 @@ def __init__(
279278 vsize : int = 4000 ,
280279 ** kwargs ,
281280 ):
281+ from transformers import RobertaTokenizerFast
282+
282283 super ().__init__ (* args , ** kwargs )
283284 self .tokenizer = RobertaTokenizerFast .from_pretrained (
284285 data_path , max_len = max_len
@@ -312,6 +313,8 @@ def __init__(
312313 vsize : int = 4000 ,
313314 ** kwargs ,
314315 ):
316+ import selfies as sf
317+
315318 super ().__init__ (* args , ** kwargs )
316319 self .error_count = 0
317320 sf .set_semantic_constraints ("hypervalent" )
@@ -323,6 +326,8 @@ def name(cls) -> str:
323326
324327 def _read_data (self , raw_data : str ) -> Optional [List [int ]]:
325328 """Read and tokenize raw data using SELFIES."""
329+ import selfies as sf
330+
326331 try :
327332 tokenized = sf .split_selfies (sf .encoder (raw_data .strip (), strict = True ))
328333 tokenized = [self ._get_token_index (v ) for v in tokenized ]
0 commit comments