File tree Expand file tree Collapse file tree 1 file changed +1
-6
lines changed
Expand file tree Collapse file tree 1 file changed +1
-6
lines changed Original file line number Diff line number Diff line change 1515
1616from fms_fsdp .utils .checkpointing_utils import get_latest
1717
18- # TODO: titan PR adds
19- # TODO: zero-len file asserts/check
20-
2118"""
2219The following distributed dataloaders are designed around 3 main principles:
2320
@@ -1274,10 +1271,8 @@ def __iter__(self):
12741271 newpath = os .path .join (self .datapath , shardid )
12751272 path , reader = self ._get_reader (path , newpath , reader )
12761273 doc = self .filehandler .get (reader , docid , self .drop )
1277- if len (doc ) == 0 :
1278- continue
12791274 doclen = len (doc ) + 1 if self .bos is None else len (doc ) + 2
1280- if doclen >= self .min_length :
1275+ if len ( doc ) > 0 and doclen >= self .min_length :
12811276 n_chunks = math .ceil (doclen / self .chunksize )
12821277 for j in range (residual_chunks ):
12831278 self .chunk_index = j
You can’t perform that action at this time.
0 commit comments