@@ -456,52 +456,122 @@ def py_reader(capacity,
456
456
name = None ,
457
457
use_double_buffer = True ):
458
458
"""
459
- Create a reader and blocking queue for data feeding in Python
459
+ Create a python reader for data feeding in Python
460
460
461
- This layer returns a Reader Variable and a BlockingQueue.
462
- The BlockingQueue provides `push()` method to push a `LoDTensorArray`
463
- object into the queue in Python side. In C++ side, the Reader
464
- Variable would invoke `pop()` method of the queue to retrieve the
465
- feeding data. The process of feeding data in Python side and fetching
466
- data in C++ side can run in parallel. The BlockingQueue should be closed
467
- using `close()` method when unused.
461
+ This layer returns a Reader Variable.
462
+ The Reader provides :code:`decorate_paddle_reader` and
463
+ :code:`decorate_tensor_provider` to set a Python generator as the data
464
+ source in Python side. When :code:`Executor::Run()` is invoked in C++
465
+ side, the data from the generator would be read automatically. Unlike
466
+ :code:`DataFeeder.feed()`, the data reading process and
467
+ :code:`Executor::Run()` process can run in parallel using
468
+ :code:`py_reader`. The :code:`start()` method of the Reader should be
469
+ called when each pass begins, while the :code:`reset()` method should be
470
+ called when the pass ends and :code:`fluid.core.EOFException` raises.
471
+ Note that :code:`Program.clone()` method cannot clone :code:`py_reader`.
468
472
469
473
Args:
470
- use_double_buffer(bool): Whether use double buffer or not.
471
- capacity(int): The maximum capacity of the BlockingQueue.
474
+ capacity(int): The buffer capacity maintained by :code:`py_reader`.
472
475
shapes(list|tuple): List of tuples which declaring data shapes.
473
476
dtypes(list|tuple): List of strs which declaring data type.
474
477
lod_levels(list|tuple): List of ints which declaring data lod_level.
475
478
name(basestring): The prefix Python queue name and Reader name. None will
476
479
be generated automatically.
480
+ use_double_buffer(bool): Whether use double buffer or not.
477
481
478
482
Returns:
479
- tuple(Variable, BlockingQueue):
480
- A Reader Variable from which we can get feeding data.
481
-
482
- A BlockingQueue object for data feeding.
483
+ Variable: A Reader from which we can get feeding data.
483
484
484
485
Examples:
485
486
486
- .. code-block:: python
487
+ 1. The basic usage of : code:`py_reader` is as follows:
487
488
488
- reader, queue = fluid.layers.py_reader(
489
- capacity=10,
490
- shapes=[[-1,3,224,224], [-1,1]],
491
- dtypes=['float32', 'int64'])
492
- # Via the reader, we can use 'read_file' layer to get data:
493
- image, label = fluid.layers.read_file(reader)
494
-
495
- # Via the blocking queue, we can feed data using threads
496
- def feed_data(queue, feed_images, feed_labels):
497
- for feed_image, feed_label in zip(feed_images, feed_labels):
498
- data = core.LoDTensorArray()
499
- data.append(feed_image)
500
- data.append(feed_label)
501
- queue.push(data)
502
-
503
- thread = threading.Thread(target=feed_data, args=(queue, feed_images, feed_labels))
504
- thread.start()
489
+ >>> import paddle.v2
490
+ >>> import paddle.fluid as fluid
491
+ >>> import paddle.dataset.mnist as mnist
492
+ >>>
493
+ >>> reader = fluid.layers.py_reader(capacity=64,
494
+ >>> shapes=[(-1,3,224,224), (-1,1)],
495
+ >>> dtypes=['float32', 'int64'])
496
+ >>> reader.decorate_paddle_reader(
497
+ >>> paddle.v2.reader.shuffle(paddle.batch(mnist.train())
498
+ >>>
499
+ >>> img, label = fluid.layers.read_file(reader)
500
+ >>> loss = network(img, label) # some network definition
501
+ >>>
502
+ >>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
503
+ >>>
504
+ >>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
505
+ >>> for epoch_id in range(10):
506
+ >>> reader.start()
507
+ >>> try:
508
+ >>> while True:
509
+ >>> exe.run(fetch_list=[loss.name])
510
+ >>> except fluid.core.EOFException:
511
+ >>> reader.reset()
512
+
513
+ 2. When training and testing are both performed, two different
514
+ :code:`py_reader` should be created with different names, e.g.:
515
+
516
+ >>> import paddle.v2
517
+ >>> import paddle.fluid as fluid
518
+ >>> import paddle.dataset.mnist as mnist
519
+ >>>
520
+ >>> def network(reader):
521
+ >>> img, label = fluid.layers.read_file(reader)
522
+ >>> # Here, we omitted the network definition
523
+ >>> return loss
524
+ >>>
525
+ >>> train_reader = fluid.layers.py_reader(capacity=64,
526
+ >>> shapes=[(-1,3,224,224), (-1,1)],
527
+ >>> dtypes=['float32', 'int64'],
528
+ >>> name='train_reader')
529
+ >>> train_reader.decorate_paddle_reader(
530
+ >>> paddle.v2.reader.shuffle(paddle.batch(mnist.train())
531
+ >>>
532
+ >>> test_reader = fluid.layers.py_reader(capacity=32,
533
+ >>> shapes=[(-1,3,224,224), (-1,1)],
534
+ >>> dtypes=['float32', 'int64'],
535
+ >>> name='test_reader')
536
+ >>> test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
537
+ >>>
538
+ >>> # Create train_main_prog and train_startup_prog
539
+ >>> train_main_prog = fluid.Program()
540
+ >>> train_startup_prog = fluid.Program()
541
+ >>> with fluid.program_guard(train_main_prog, train_startup_prog):
542
+ >>> # Use fluid.unique_name.guard() to share parameters with test program
543
+ >>> with fluid.unique_name.guard():
544
+ >>> train_loss = network(train_reader) # some network definition
545
+ >>> adam = fluid.optimizer.Adam(learning_rate=0.01)
546
+ >>> adam.minimize(loss)
547
+ >>>
548
+ >>> # Create test_main_prog and test_startup_prog
549
+ >>> test_main_prog = fluid.Program()
550
+ >>> test_startup_prog = fluid.Program()
551
+ >>> with fluid.program_guard(test_main_prog, test_startup_prog):
552
+ >>> # Use fluid.unique_name.guard() to share parameters with train program
553
+ >>> with fluid.unique_name.guard():
554
+ >>> test_loss = network(test_reader)
555
+ >>>
556
+ >>> fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
557
+ >>> fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
558
+ >>>
559
+ >>> train_exe = fluid.ParallelExecutor(use_cuda=True,
560
+ >>> loss_name=train_loss.name, main_program=train_main_prog)
561
+ >>> test_exe = fluid.ParallelExecutor(use_cuda=True,
562
+ >>> loss_name=test_loss.name, main_program=test_main_prog)
563
+ >>> for epoch_id in range(10):
564
+ >>> try:
565
+ >>> while True:
566
+ >>> train_exe.run(fetch_list=[train_loss.name])
567
+ >>> except fluid.core.EOFException:
568
+ >>> train_reader.reset()
569
+ >>>
570
+ >>> try:
571
+ >>> while True:
572
+ >>> test_exe.run(fetch_list=[test_loss.name])
573
+ >>> except fluid.core.EOFException:
574
+ >>> test_reader.reset()
505
575
"""
506
576
dtypes = [convert_np_dtype_to_dtype_ (dt ) for dt in dtypes ]
507
577
shape_concat = []
0 commit comments