tensorlayer
diff --git a/‎docs/modules/dataflow.rst‎
Lines changed: 43 additions & 29 deletions b/‎docs/modules/dataflow.rst‎
Lines changed: 43 additions & 29 deletions
diff --git a/‎tensorlayerx/dataflow/__init__.py‎
Lines changed: 3 additions & 17 deletions b/‎tensorlayerx/dataflow/__init__.py‎
Lines changed: 3 additions & 17 deletions
diff --git a/‎tensorlayerx/dataflow/dataloader.py‎
Lines changed: 177 additions & 0 deletions b/‎tensorlayerx/dataflow/dataloader.py‎
Lines changed: 177 additions & 0 deletions
@@ -12,25 +12,32 @@ Dataflow list
 
 .. autosummary::
 
+   DataLoader
    Dataset
    IterableDataset
-   FromGenerator
-   FromSlices
-   Dataloader
+   TensorDataset
+   ChainDataset
+   ConcatDataset
+   Subset
+   random_split
+   Sampler
+   BatchSampler
+   RandomSampler
+   SequentialSampler
+   WeightedRandomSampler
+   SubsetRandomSampler
 
-   Concat
-   Zip
-   Batch
-   Map
-   Repeat
-   Shuffle
 
 .. -----------------------------------------------------------
 ..                        Dataflow
 .. -----------------------------------------------------------
 
 Dataflow
 -----------------
+DataLoader
+^^^^^^^^^^^^^^^^
+.. autoclass:: DataLoader
+
 
 Dataset
 ^^^^^^^^^^^^^^^^
@@ -41,39 +48,46 @@ IterableDataset
 ^^^^^^^^^^^^^^^^
 .. autoclass:: IterableDataset
 
-FromGenerator
+TensorDataset
 ^^^^^^^^^^^^^^^^
-.. autoclass:: FromGenerator
+.. autoclass:: TensorDataset
 
-FromSlices
+ChainDataset
 ^^^^^^^^^^^^^^^^
-.. autoclass:: FromSlices
+.. autoclass:: ChainDataset
 
-Dataloader
+ConcatDataset
 ^^^^^^^^^^^^^^^^
-.. autoclass:: Dataloader
+.. autoclass:: ConcatDataset
 
-Concat
+Subset
 ^^^^^^^^^^^^^^^^
-.. autoclass:: Concat
+.. autoclass:: Subset
 
-Zip
+random_split
 ^^^^^^^^^^^^^^^^
-.. autoclass:: Zip
+.. autoclass:: random_split
 
-Batch
+Sampler
 ^^^^^^^^^^^^^^^^
-.. autoclass:: Batch
+.. autoclass:: Sampler
 
-Map
-^^^^^^^^^^^^^^^^^^^^^
-.. autoclass:: Map
+BatchSampler
+^^^^^^^^^^^^^^^^
+.. autoclass:: BatchSampler
 
-Repeat
-^^^^^^^^^^^^^^^^^^^^^
-.. autoclass:: Repeat
+RandomSampler
+^^^^^^^^^^^^^^^^
+.. autoclass:: RandomSampler
 
-Shuffle
+SequentialSampler
 ^^^^^^^^^^^^^^^^^^^^^
-.. autoclass:: Shuffle
+.. autoclass:: SequentialSampler
+
+WeightedRandomSampler
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autoclass:: WeightedRandomSampler
 
+SubsetRandomSampler
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autoclass:: SubsetRandomSampler
@@ -1,20 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function
-
-from tensorlayerx.backend.ops.load_backend import BACKEND
-
-if BACKEND == 'tensorflow':
-    from .tensorflow_data import *
-
-elif BACKEND == 'mindspore':
-    from .mindspore_data import *
-
-elif BACKEND == 'paddle':
-    from .paddle_data import *
-
-elif BACKEND == 'torch':
-    from .torch_data import *
-
-else:
-    raise NotImplementedError("This backend is not supported")
+from .dataloader import *
+from .sampler import *
+from .dataset import *
@@ -0,0 +1,177 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+from .dataset import Dataset, IterableDataset
+from .sampler import Sampler, SequentialSampler, RandomSampler, BatchSampler, SubsetRandomSampler, WeightedRandomSampler
+from .utils import _DatasetKind, _InfiniteIterableSampler
+from . import utils
+import math
+__all__ = [
+    'DataLoader',
+]
+
+
+class DataLoader(object):
+    """ Data loader. Combines a dataset and a sampler, and provides an iterable over the given dataset.
+
+    The :class:`tensorlayerx.dataflow.DataLoader` supports both map-style and
+    iterable-style datasets with single- or multi-process loading, customizing
+    loading order and optional automatic batching
+
+    Parameters
+    -----------
+    dataset : Dataset
+        dataset from which to load the data.
+    batch_size : int
+        how many samples per batch to load, default is 1.
+    shuffle : bool
+        set to ``True`` to have the data reshuffled at every epoch, default is ``False``.
+    drop_last : bool
+        set to ``True`` to drop the last incomplete batch,
+        if the dataset size is not divisible by the batch size. If ``False`` and
+        the size of dataset is not divisible by the batch size, then the last batch
+        will be smaller. default is ``False``.
+    sampler : Sampler
+        defines the strategy to draw samples from the dataset. If specified, `shuffle` must not be specified.
+    batch_sampler : Sampler
+        returns a batch of indices at a time. If specified, `shuffle`, `batch_size`, `drop_last`, `sampler` must not be specified.
+    num_workers : int
+        how many subprocesses to use for data loading. ``0`` means that the data will be loaded in single process. default is ``0``.
+    collate_fn : callable
+        merges a list of samples to form a mini-batch of Tensor(s).  Used when using batched loading from a map-style dataset.
+    time_out : numeric
+        if positive, the timeout value for collecting a batch from workers. Should always be non-negative. default is ``0``.
+    worker_init_fn : callable
+        If not ``None``, this will be called on each worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as
+        input, after seeding and before data loading. default is ``None``.
+    prefetch_factor : int
+        Number of samples loaded in advance by each worker.
+        ``2`` means there will be a total of 2 * num_workers samples prefetched across all workers. default is ``2``
+    persistent_workers : bool
+        If ``True``, the data loader will not shutdown the worker processes after a dataset has been consumed once.
+        This allows to maintain the workers `Dataset` instances alive. default is ``False``.
+    """
+
+    def __init__(
+        self,
+        dataset,
+        batch_size=1,
+        shuffle=False,
+        drop_last=False,
+        sampler=None,
+        batch_sampler=None,
+        num_workers=0,
+        collate_fn=None,
+        # pin_memory = False,
+        time_out=0,
+        worker_init_fn=None,
+        #multiprocessing_context=None,
+        prefetch_factor=2,
+        persistent_workers=False,
+    ):
+        # assert isinstance(dataset, Dataset), "dataset should be subclass of tensorlayerx.dataflow.Dataset"
+        self.dataset = dataset
+        assert num_workers >= 0, "num_workers should be a non_negative integer"
+        if num_workers == 0 and prefetch_factor != 2:
+            raise ValueError("prefetch_factor option should not be specified, when num_workers is 0.")
+        if persistent_workers and num_workers == 0:
+            raise ValueError('persistent_workers option needs num_workers > 0')
+        self.num_workers = num_workers
+        self.prefetch_factor = prefetch_factor
+        # self.pin_memory = pin_memory
+        self.time_out = time_out
+        self.worker_init_fn = worker_init_fn
+        #self.multiprocessing_context = multiprocessing_context
+        if isinstance(dataset, IterableDataset):
+            self._dataset_kind = _DatasetKind.Iter
+            if shuffle is not False:
+                raise ValueError("IterableDataset only support 'shuffle=False', but got shuffle={}.".format(shuffle))
+            elif sampler is not None:
+                raise ValueError("IterableDataset only support 'sampler=None', but got sampler={}.".format(sampler))
+            elif batch_sampler is not None:
+                raise ValueError(
+                    "IterableDataset only support 'batch_sampler=None', "
+                    "but got batch_sampler={}.".format(batch_sampler)
+                )
+        else:
+            self._dataset_kind = _DatasetKind.Map
+
+        if sampler is not None and shuffle:
+            raise ValueError("sampler option is mutually exclusive with shuffle option.")
+
+        if batch_sampler is not None:
+            if batch_size != 1 or shuffle or sampler is not None or drop_last:
+                raise ValueError(
+                    "batch_size, shuffle, sampler, drop_last should not be set, when batch_sampler is specified."
+                )
+            batch_size = None
+            drop_last = False
+        elif batch_size is None:
+            if drop_last:
+                raise ValueError("drop_last should be False, when batch_size is None.")
+
+        if sampler is None:
+            if self._dataset_kind == _DatasetKind.Iter:
+                sampler = _InfiniteIterableSampler()
+            else:
+                if shuffle:
+                    sampler = RandomSampler(dataset)
+                else:
+                    sampler = SequentialSampler(dataset)
+
+        if batch_size is not None and batch_sampler is None:
+            batch_sampler = BatchSampler(sampler, batch_size, drop_last)
+
+        self.batch_size = batch_size
+        self.drop_last = drop_last
+        self.sampler = sampler
+        self.batch_sampler = batch_sampler
+        self._iterator = None
+        if collate_fn is None:
+            if self._is_batch:
+                collate_fn = utils.default_collate
+            else:
+                collate_fn = utils.default_convert
+
+        self.collate_fn = collate_fn
+        self.persistent_workers = persistent_workers
+
+    @property
+    def _is_batch(self):
+        return self.batch_sampler is not None
+
+    @property
+    def _index_sampler(self):
+        if self._is_batch:
+            return self.batch_sampler
+        else:
+            return self.sampler
+
+    def _get_iterator(self):
+        if self.num_workers == 0:
+            return utils._SingleProcessDataLoaderIter(self)
+        else:
+            return utils._MultiProcessingDataLoaderIter(self)
+
+    def __iter__(self):
+
+        if self.persistent_workers and self.num_workers > 0:
+            if self._iterator is None:
+
+                self._iterator = self._get_iterator()
+            else:
+                self._iterator._reset(self)
+            return self._iterator
+        else:
+            return self._get_iterator()
+
+    def __len__(self):
+        if self._dataset_kind == _DatasetKind.Iter:
+            length = len(self.dataset)
+            if self.batch_size is not None:
+                if self.drop_last:
+                    length = length // self.batch_size
+                else:
+                    length = math.ceil(length / self.batch_size)
+            return length
+        else:
+            return len(self._index_sampler)