@@ -39,15 +39,16 @@ In the backward pass
39
39
This implementation allows to write mixed device program like this
40
40
41
41
```python
42
- # get embedding feature on CPU
43
- feature = some_cpu_only_op(data )
42
+ W1 = fluid.tensor(size=[100,20], parameter=true)
43
+ W2 = fluid.tensor(size=[20,15], parameter=true )
44
44
45
- gpu_places = get_place(use_gpu=True)
45
+ data = layers.data()
46
+
47
+ gpu_places = layers.get_place(use_gpu=True)
46
48
# parallel processing on multiple GPUs
47
49
pd = ParallelDo(gpu_places)
48
- with pd.do():
49
- read_input(feature)
50
- prediction = my_net(feature)
50
+ with pd.do(input=data):
51
+ prediction = softmax(fc(fc(data, W1), W2))
51
52
write_output(prediction)
52
53
prediction = pd()
53
54
loss = cross_entropy(prediction, label)
@@ -66,20 +67,20 @@ start_program
66
67
main_program
67
68
{
68
69
block0 {
69
- vars: data, places, w1, w2
70
+ vars: data, places, w1, w2, w1_grad, w2_grad,
70
71
ops: data, get_place, parallel_do(block1),
71
72
parallel_do_grad(block2),
72
73
sgd(w2, w2_grad),
73
74
sgd(w1, w1_grad)
74
75
}
75
- block1 {
76
+ block1 { # the forward pass
76
77
parent_block: 0
77
78
vars: data, h1, h2, loss
78
79
ops: fc, fc, softmax
79
80
}
80
- block2 {
81
+ block2 { # the backward pass
81
82
parent_block: 1
82
- vars: data_grad, h1_grad, h2_grad, loss_gard, w1_grad, w2_grad
83
+ vars: data_grad, h1_grad, h2_grad, loss_gard, local_w1_grad, local_w2_grad
83
84
ops: softmax_grad,
84
85
fc_grad
85
86
fc_grad
0 commit comments