Skip to content

Commit 4e91d8d

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into graph_quantization
test=develop
2 parents 3b668c1 + b3fdf70 commit 4e91d8d

File tree

3 files changed

+26
-27
lines changed

3 files changed

+26
-27
lines changed

paddle/fluid/operators/distributed/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ else()
3737
variable_response.cc
3838
collective_client.cc collective_server.cc
3939
${BRPC_SRCS}
40-
PROTO ${CMAKE_CURRENT_BINARY_DIR}/send_recv.proto
40+
PROTO send_recv.proto
4141
DEPS lod_tensor selected_rows memory)
4242

4343
set(RPC_DEPS sendrecvop_rpc brpc ssl crypto protobuf leveldb snappystream snappy zlib)

paddle/fluid/operators/group_norm_op.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,20 @@ namespace operators {
2121

2222
enum GroupNormKernelFlags { kHasScale = 1, kHasBias = 2 };
2323

24-
#define CHECK_CASE(i, flags, kernel_name, args...) \
25-
if (i == flags) { \
26-
kernel_name<T, i><<<grid, threads, 0, dev_ctx.stream()>>>(args); \
24+
#define CHECK_CASE(i, flags, kernel_name, ...) \
25+
if (i == flags) { \
26+
kernel_name<T, i><<<grid, threads, 0, dev_ctx.stream()>>>(__VA_ARGS__); \
2727
}
2828

2929
// 0 for no scale, no bias
3030
// 1 for has scale, no bias
3131
// 2 for no scale, has bias
3232
// 3 for has scale, has bias
33-
#define UNROLL_ALL_CASES(flags, kernel_name, args...) \
34-
CHECK_CASE(0, flags, kernel_name, args) \
35-
CHECK_CASE(1, flags, kernel_name, args) \
36-
CHECK_CASE(2, flags, kernel_name, args) \
37-
CHECK_CASE(3, flags, kernel_name, args)
33+
#define UNROLL_ALL_CASES(flags, kernel_name, ...) \
34+
CHECK_CASE(0, flags, kernel_name, __VA_ARGS__) \
35+
CHECK_CASE(1, flags, kernel_name, __VA_ARGS__) \
36+
CHECK_CASE(2, flags, kernel_name, __VA_ARGS__) \
37+
CHECK_CASE(3, flags, kernel_name, __VA_ARGS__)
3838

3939
template <typename T>
4040
__device__ __inline__ void CudaAtomicAddWithWarp(T* sum, T value) {

python/paddle/fluid/executor.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,9 @@ class Executor(object):
305305
def __init__(self, place):
306306
self.place = place
307307
self.program_caches = dict()
308-
self.executor = None
308+
p = core.Place()
309+
p.set_place(self.place)
310+
self._default_executor = core.Executor(p)
309311
self._closed = False
310312

311313
def _get_program_cache(self, program_cache_key):
@@ -397,12 +399,13 @@ def close(self):
397399
>>> ...
398400
>>> exe.close()
399401
"""
400-
if not self._closed and self.executor:
401-
self.executor.close()
402+
if not self._closed:
403+
self._default_executor.close()
402404
self._closed = True
403405

404406
def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name,
405407
return_numpy):
408+
exe = program._executor
406409
if isinstance(feed, dict):
407410
feed_tensor_dict = dict()
408411
for feed_name in feed:
@@ -414,8 +417,7 @@ def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name,
414417
feed_tensor.set(feed[feed_name], core.CPUPlace())
415418
feed_tensor_dict[feed_name] = feed_tensor
416419

417-
self.executor.feed_and_split_tensor_into_local_scopes(
418-
feed_tensor_dict)
420+
exe.feed_and_split_tensor_into_local_scopes(feed_tensor_dict)
419421
elif isinstance(feed, list) or isinstance(feed, tuple):
420422
if len(feed) != len(program._places):
421423
raise ValueError(
@@ -436,10 +438,10 @@ def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name,
436438
tensor = tmp
437439
res_dict[feed_name] = tensor
438440
res.append(res_dict)
439-
self.executor.feed_tensors_into_local_scopes(res)
441+
exe.feed_tensors_into_local_scopes(res)
440442

441443
fetch_var_names = list(map(_to_name_str, fetch_list))
442-
self.executor.run(fetch_var_names, fetch_var_name)
444+
exe.run(fetch_var_names, fetch_var_name)
443445
arr = scope.find_var(fetch_var_name).get_lod_tensor_array()
444446

445447
if return_numpy:
@@ -511,12 +513,9 @@ def run(self,
511513
compiled = isinstance(program, compiler.CompiledProgram)
512514
# For backward compatibility, run directly.
513515
if not compiled:
514-
if not self.executor:
515-
p = core.Place()
516-
p.set_place(self.place)
517-
self.executor = core.Executor(p)
518516
return self._run(
519517
program,
518+
self._default_executor,
520519
feed=feed,
521520
fetch_list=fetch_list,
522521
feed_var_name=feed_var_name,
@@ -526,7 +525,6 @@ def run(self,
526525
use_program_cache=use_program_cache)
527526

528527
program._compile(scope, self.place)
529-
self.executor = program._executor
530528
if program._is_data_parallel:
531529
return self._run_parallel(
532530
program,
@@ -536,12 +534,13 @@ def run(self,
536534
fetch_var_name=fetch_var_name,
537535
return_numpy=return_numpy)
538536
elif program._is_inference:
539-
return self._run_inference(program, feed)
537+
return self._run_inference(program._executor, feed)
540538
else:
541539
# TODO(panyx0718): Can compile program to optimize executor
542540
# performance.
543541
return self._run(
544542
program._program,
543+
self._default_executor,
545544
feed=feed,
546545
fetch_list=fetch_list,
547546
feed_var_name=feed_var_name,
@@ -550,8 +549,8 @@ def run(self,
550549
return_numpy=return_numpy,
551550
use_program_cache=use_program_cache)
552551

553-
def _run(self, program, feed, fetch_list, feed_var_name, fetch_var_name,
554-
scope, return_numpy, use_program_cache):
552+
def _run(self, program, exe, feed, fetch_list, feed_var_name,
553+
fetch_var_name, scope, return_numpy, use_program_cache):
555554

556555
if feed is None:
557556
feed = {}
@@ -589,11 +588,11 @@ def _run(self, program, feed, fetch_list, feed_var_name, fetch_var_name,
589588
fetch_var_name=fetch_var_name)
590589

591590
self._feed_data(program, feed, feed_var_name, scope)
592-
self.executor.run(program.desc, scope, 0, True, True)
591+
exe.run(program.desc, scope, 0, True, True)
593592
outs = self._fetch_data(fetch_list, fetch_var_name, scope)
594593
if return_numpy:
595594
outs = as_numpy(outs)
596595
return outs
597596

598-
def _run_inference(self, program, feed):
599-
return self.executor.run(feed)
597+
def _run_inference(self, exe, feed):
598+
return exe.run(feed)

0 commit comments

Comments
 (0)