File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
chebai/preprocessing/datasets Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -251,7 +251,7 @@ def processed_file_names_dict(self) -> List[str]:
251251 if train_samples <= self .train_batch_size
252252 else {
253253 f"train_{ i } " : f"train_{ i } .pt"
254- for i in range (( train_samples // self .train_batch_size ) + 1 )
254+ for i in range (train_samples // self .train_batch_size )
255255 }
256256 )
257257 train_batches ["test" ] = "test.pt"
@@ -276,15 +276,15 @@ def _tokenize_batched(self, data):
276276 if d ["features" ] is not None :
277277 batch .append (self .reader .to_data (d ))
278278 if i % self .train_batch_size == 0 and i > 0 :
279- print (f"Saving batch { i // self .train_batch_size } " )
279+ print (f"Generating batch { i // self .train_batch_size - 1 } " )
280280 batch = [b for b in batch if b ["features" ] is not None ]
281281 if self .n_token_limit is not None :
282282 batch = [
283283 b for b in batch if len (b ["features" ]) <= self .n_token_limit
284284 ]
285285 yield batch
286286 batch = []
287- print ("Saving final batch" )
287+ print ("Generating final batch" )
288288 batch = [b for b in batch if b ["features" ] is not None ]
289289 if self .n_token_limit is not None :
290290 batch = [b for b in batch if len (b ["features" ]) <= self .n_token_limit ]
You can’t perform that action at this time.
0 commit comments