File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
chebai/preprocessing/datasets Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -217,7 +217,7 @@ class PubChemBatched(PubChem):
217217
218218 READER : Type [dr .ChemDataReader ] = dr .ChemDataReader
219219
220- def __init__ (self , train_batch_size = 10_000_000 , * args , ** kwargs ):
220+ def __init__ (self , train_batch_size = 1_000_000 , * args , ** kwargs ):
221221 super (PubChemBatched , self ).__init__ (* args , ** kwargs )
222222 self .curr_epoch = 0
223223 self .train_batch_size = train_batch_size
@@ -275,8 +275,8 @@ def _tokenize_batched(self, data):
275275 for i , d in enumerate (tqdm .tqdm (data , total = len (data ))):
276276 if d ["features" ] is not None :
277277 batch .append (self .reader .to_data (d ))
278- if i % 1_000_000 == 0 and i > 0 :
279- print (f"Saving batch { i // 1_000_000 } " )
278+ if i % self . train_batch_size == 0 and i > 0 :
279+ print (f"Saving batch { i // self . train_batch_size } " )
280280 batch = [b for b in batch if b ["features" ] is not None ]
281281 if self .n_token_limit is not None :
282282 batch = [
You can’t perform that action at this time.
0 commit comments