Skip to content

Commit c73b0fb

Browse files
committed
fix number of expected pubchem batches
1 parent 8ee5c4b commit c73b0fb

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

chebai/preprocessing/datasets/pubchem.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def processed_file_names_dict(self) -> List[str]:
251251
if train_samples <= self.train_batch_size
252252
else {
253253
f"train_{i}": f"train_{i}.pt"
254-
for i in range((train_samples // self.train_batch_size) + 1)
254+
for i in range(train_samples // self.train_batch_size)
255255
}
256256
)
257257
train_batches["test"] = "test.pt"
@@ -276,15 +276,15 @@ def _tokenize_batched(self, data):
276276
if d["features"] is not None:
277277
batch.append(self.reader.to_data(d))
278278
if i % self.train_batch_size == 0 and i > 0:
279-
print(f"Saving batch {i // self.train_batch_size}")
279+
print(f"Generating batch {i // self.train_batch_size - 1}")
280280
batch = [b for b in batch if b["features"] is not None]
281281
if self.n_token_limit is not None:
282282
batch = [
283283
b for b in batch if len(b["features"]) <= self.n_token_limit
284284
]
285285
yield batch
286286
batch = []
287-
print("Saving final batch")
287+
print("Generating final batch")
288288
batch = [b for b in batch if b["features"] is not None]
289289
if self.n_token_limit is not None:
290290
batch = [b for b in batch if len(b["features"]) <= self.n_token_limit]

0 commit comments

Comments
 (0)