Skip to content

Commit a919fc4

Browse files
committed
Some cleanup (no continue)
1 parent 5b76df2 commit a919fc4

File tree

1 file changed

+1
-6
lines changed

1 file changed

+1
-6
lines changed

fms_fsdp/utils/dataset_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515

1616
from fms_fsdp.utils.checkpointing_utils import get_latest
1717

18-
# TODO: titan PR adds
19-
# TODO: zero-len file asserts/check
20-
2118
"""
2219
The following distributed dataloaders are designed around 3 main principles:
2320
@@ -1274,10 +1271,8 @@ def __iter__(self):
12741271
newpath = os.path.join(self.datapath, shardid)
12751272
path, reader = self._get_reader(path, newpath, reader)
12761273
doc = self.filehandler.get(reader, docid, self.drop)
1277-
if len(doc) == 0:
1278-
continue
12791274
doclen = len(doc) + 1 if self.bos is None else len(doc) + 2
1280-
if doclen >= self.min_length:
1275+
if len(doc) > 0 and doclen >= self.min_length:
12811276
n_chunks = math.ceil(doclen / self.chunksize)
12821277
for j in range(residual_chunks):
12831278
self.chunk_index = j

0 commit comments

Comments
 (0)