File tree Expand file tree Collapse file tree 1 file changed +5
-6
lines changed
src/lightning/data/streaming Expand file tree Collapse file tree 1 file changed +5
-6
lines changed Original file line number Diff line number Diff line change @@ -27,8 +27,11 @@ class CombinedStreamingDataset(IterableDataset):
27
27
"""The `CombinedStreamingDataset` enables to stream data from multiple StreamingDataset with the sampling ratio of
28
28
your choice.
29
29
30
- Addtionally, the `CombinedStreamingDataset` keeps track of the number of
31
- samples fetched to enable resumability of the datasets.
30
+ Addtionally, the `CombinedStreamingDataset` keeps track of the number of samples fetched to enable resumability
31
+ of the datasets.
32
+
33
+ Note that due to the random sampling, the number of samples returned from the iterator is variable and a function
34
+ of the given seed. The combined dataset will raise a StopIteration as soon as any of the datasets is exhausted.
32
35
33
36
"""
34
37
@@ -71,10 +74,6 @@ def _set_use_streaming_dataloader(self, use_streaming_dataloader: bool) -> None:
71
74
# Used to prevent returning num_samples_yielded when using PyTorch DataLoader
72
75
self ._use_streaming_dataloader = use_streaming_dataloader
73
76
74
- def __len__ (self ) -> int :
75
- assert self ._weights
76
- return int (min ([1 / w * len (d ) for w , d in zip (self ._weights , self ._datasets ) if w > 0 ]))
77
-
78
77
def __iter__ (self ) -> Iterator [Any ]:
79
78
assert self ._weights
80
79
You can’t perform that action at this time.
0 commit comments