Skip to content

Commit 3f8d9b0

Browse files
author
chengduo
authored
Merge pull request #11580 from chengduoZH/fix_doc_data_reader
Refine doc of data reader
2 parents 4b7ae14 + 10cee7e commit 3f8d9b0

File tree

2 files changed

+104
-3
lines changed

2 files changed

+104
-3
lines changed

python/paddle/fluid/data_feeder.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,61 @@ def done(self):
7979

8080

8181
class DataFeeder(object):
82+
"""
83+
DataFeeder converts the data that returned by a reader into a data
84+
structure that can feed into Executor and ParallelExecutor. The reader
85+
usually returns a list of mini-batch data entries. Each data entry in
86+
the list is one sample. Each sample is a list or a tuple with one
87+
feature or multiple features.
88+
89+
The simple usage shows below:
90+
91+
.. code-block:: python
92+
93+
place = fluid.CPUPlace()
94+
img = fluid.layers.data(name='image', shape=[1, 28, 28])
95+
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
96+
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
97+
result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
98+
99+
100+
If you want to feed data into GPU side separately in advance when you
101+
use multi-GPU to train a model, you can use `decorate_reader` function.
102+
103+
.. code-block:: python
104+
105+
place=fluid.CUDAPlace(0)
106+
feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
107+
reader = feeder.decorate_reader(
108+
paddle.batch(flowers.train(), batch_size=16))
109+
110+
Args:
111+
feed_list(list): The Variables or Variables'name that will
112+
feed into model.
113+
place(Place): place indicates feed data into CPU or GPU, if you want to
114+
feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents
115+
the GPU id), or if you want to feed data into CPU, please using
116+
`fluid.CPUPlace()`.
117+
program(Program): The Program that will feed data into, if program
118+
is None, it will use default_main_program(). Default None.
119+
120+
Raises:
121+
ValueError: If some Variable is not in this Program.
122+
123+
Examples:
124+
.. code-block:: python
125+
126+
# ...
127+
place = fluid.CPUPlace()
128+
feed_list = [
129+
main_program.global_block().var(var_name) for var_name in feed_vars_name
130+
] # feed_vars_name is a list of variables' name.
131+
feeder = fluid.DataFeeder(feed_list, place)
132+
for data in reader():
133+
outs = exe.run(program=main_program,
134+
feed=feeder.feed(data))
135+
"""
136+
82137
def __init__(self, feed_list, place, program=None):
83138
self.feed_dtypes = []
84139
self.feed_names = []
@@ -108,6 +163,16 @@ def __init__(self, feed_list, place, program=None):
108163
self.place = place
109164

110165
def feed(self, iterable):
166+
"""
167+
According to feed_list and iterable, converters the input into
168+
a data structure that can feed into Executor and ParallelExecutor.
169+
170+
Args:
171+
iterable(list|tuple): the input data.
172+
173+
Returns:
174+
dict: the result of conversion.
175+
"""
111176
converter = []
112177
for lod_level, shape, dtype in six.zip(
113178
self.feed_lod_level, self.feed_shapes, self.feed_dtypes):
@@ -130,6 +195,20 @@ def feed(self, iterable):
130195
return ret_dict
131196

132197
def feed_parallel(self, iterable, num_places=None):
198+
"""
199+
Takes multiple mini-batches. Each mini-batch will be feed on each
200+
device in advance.
201+
202+
Args:
203+
iterable(list|tuple): the input data.
204+
num_places(int): the number of devices. Default None.
205+
206+
Returns:
207+
dict: the result of conversion.
208+
209+
Notes:
210+
The number of devices and number of mini-batches must be same.
211+
"""
133212
if isinstance(self.place, core.CUDAPlace):
134213
places = [
135214
core.CUDAPlace(i)
@@ -168,6 +247,24 @@ def decorate_reader(self,
168247
multi_devices,
169248
num_places=None,
170249
drop_last=True):
250+
"""
251+
Converter the input data into a data that returned by reader into
252+
multiple mini-batches. Each mini-batch will be feed on each device.
253+
254+
Args:
255+
reader(fun): the input data.
256+
multi_devices(bool): the number of places. Default None.
257+
num_places(int): the number of places. Default None.
258+
drop_last(bool): the number of places. Default None.
259+
260+
Returns:
261+
dict: the result of conversion.
262+
263+
Raises:
264+
ValueError: If drop_last is False and the data batch which cannot
265+
fit for devices.
266+
"""
267+
171268
def __reader_creator__():
172269
if not multi_devices:
173270
for item in reader():

python/paddle/fluid/executor.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name):
135135

136136
def fetch_var(name, scope=None, return_numpy=True):
137137
"""
138-
Fetch the value of the variable with the given name from the given scope
138+
Fetch the value of the variable with the given name from the
139+
given scope.
140+
139141
Args:
140142
name(str): name of the variable. Typically, only persistable variables
141143
can be found in the scope used for running the program.
142144
scope(core.Scope|None): scope object. It should be the scope where
143145
you pass to Executor.run() when running your program.
144-
If None, global_scope() will be used.
145-
return_numpy(bool): whether convert the tensor to numpy.ndarray
146+
If None, global_scope() will be used. Default None.
147+
return_numpy(bool): whether convert the tensor to numpy.ndarray.
148+
Default True.
149+
146150
Returns:
147151
LodTensor|numpy.ndarray
148152
"""

0 commit comments

Comments
 (0)