Skip to content

Commit a57e8a4

Browse files
committed
add cpu test
1 parent 1e731f5 commit a57e8a4

File tree

5 files changed

+20
-14
lines changed

5 files changed

+20
-14
lines changed

paddle/fluid/framework/details/all_reduce_op_handle.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void AllReduceOpHandle::RunImpl() {
6767

6868
if (platform::is_gpu_place(lod_tensors[0]->place())) {
6969
#ifdef PADDLE_WITH_CUDA
70-
PADDLE_ENFORCE(nccl_ctxs_);
70+
PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr.");
7171
int dtype = -1;
7272
size_t numel = 0;
7373
std::vector<std::function<void()>> all_reduce_calls;

python/paddle/fluid/parallel_executor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,10 @@ def __init__(self,
119119
if use_cuda:
120120
# Experiments on se-resnext shows that too many threads hurt
121121
# performance. Worth tunning for other models in the future.
122-
exec_strategy.num_threads = len(self._places) * 2
122+
exec_strategy.num_threads = len(self._places) * 4
123123
else:
124-
cpu_num = int(
125-
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
126-
exec_strategy.num_threads = min(len(self._places) * 2, cpu_num)
124+
# Currently num_threads must be 1.
125+
exec_strategy.num_threads = 1
127126

128127
if build_strategy is None:
129128
build_strategy = BuildStrategy()

python/paddle/fluid/tests/unittests/parallel_executor_test_base.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import multiprocessing
16+
import os
1517
import unittest
1618
import paddle.fluid as fluid
1719
import time
@@ -73,7 +75,9 @@ def run_executor(exe, feed, fetch_list, program=None):
7375
exe = fluid.Executor(place=place)
7476

7577
if batch_size is not None:
76-
batch_size *= fluid.core.get_cuda_device_count()
78+
batch_size *= fluid.core.get_cuda_device_count(
79+
) if use_cuda else int(
80+
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
7781
begin = time.time()
7882
first_loss, = run_executor(
7983
exe=exe, feed=feed_dict, fetch_list=[loss.name])

python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ def setUpClass(cls):
104104
def check_simple_fc_convergence(self,
105105
balance_parameter_opt_between_cards,
106106
use_cuda=True):
107-
self.check_network_convergence(simple_fc_net)
108-
self.check_network_convergence(simple_fc_net, allow_op_delay=True)
107+
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
108+
self.check_network_convergence(
109+
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
109110

110111
img = np.zeros(shape=[32, 784], dtype='float32')
111112
label = np.ones(shape=[32, 1], dtype='int64')
@@ -142,6 +143,7 @@ def check_simple_fc_parallel_accuracy(self,
142143
seed=1000,
143144
feed_dict={"image": img,
144145
"label": label},
146+
use_cuda=use_cuda,
145147
use_parallel_executor=True,
146148
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards
147149
)
@@ -161,7 +163,7 @@ def test_simple_fc_parallel_accuracy_with_new_strategy(self):
161163

162164
def check_batchnorm_fc_convergence(
163165
self, balance_parameter_opt_between_cards, use_cuda):
164-
self.check_network_convergence(fc_with_batchnorm)
166+
self.check_network_convergence(fc_with_batchnorm, use_cuda=use_cuda)
165167
img = np.zeros(shape=[32, 784], dtype='float32')
166168
label = np.ones(shape=[32, 1], dtype='int64')
167169
self.check_network_convergence(

python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,27 +133,28 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
133133
class TestResnet(TestParallelExecutorBase):
134134
def check_resnet_convergence(self,
135135
balance_parameter_opt_between_cards,
136-
use_cuda=True):
136+
use_cuda=True,
137+
iter=20):
137138
import functools
138139
batch_size = 2
139140
self.check_network_convergence(
140141
functools.partial(
141142
SE_ResNeXt50Small, batch_size=batch_size),
142-
iter=20,
143+
iter=iter,
143144
batch_size=batch_size,
144145
use_cuda=use_cuda,
145146
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards
146147
)
147148

148149
def test_resnet(self):
149-
# os.environ['CPU_NUM'] = str(4)
150+
os.environ['CPU_NUM'] = str(4)
150151
self.check_resnet_convergence(False, use_cuda=True)
151-
# self.check_resnet_convergence(False,use_cuda=False)
152+
self.check_resnet_convergence(False, use_cuda=False, iter=5)
152153

153154
def test_resnet_with_new_strategy(self):
154155
os.environ['CPU_NUM'] = str(4)
155156
self.check_resnet_convergence(True, use_cuda=True)
156-
self.check_resnet_convergence(True, use_cuda=False)
157+
self.check_resnet_convergence(True, use_cuda=False, iter=5)
157158

158159

159160
if __name__ == '__main__':

0 commit comments

Comments
 (0)