Skip to content

Commit a7188d5

Browse files
authored
fix executor transfer cache bug (#14518)
1 parent c1bf966 commit a7188d5

File tree

7 files changed

+143
-33
lines changed

7 files changed

+143
-33
lines changed

paddle/fluid/framework/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,14 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
116116
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
117117
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
118118

119+
if (NOT WIN32)
120+
cc_library(transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto)
121+
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
122+
shape_inference data_transform lod_tensor profiler transfer_scope_cache)
123+
else()
119124
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
120-
shape_inference data_transform lod_tensor profiler)
125+
shape_inference data_transform lod_tensor)
126+
endif(NOT WIN32)
121127

122128
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
123129

paddle/fluid/framework/executor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ limitations under the License. */
2020
#include "paddle/fluid/framework/ngraph_operator.h"
2121
#include "paddle/fluid/framework/op_registry.h"
2222
#include "paddle/fluid/framework/reader.h"
23+
#include "paddle/fluid/framework/transfer_scope_cache.h"
2324
#include "paddle/fluid/operators/detail/macros.h"
2425
#include "paddle/fluid/platform/place.h"
2526
#include "paddle/fluid/platform/profiler.h"

paddle/fluid/framework/naive_executor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ void NaiveExecutor::Run() {
8383
for (auto &op : ops_) {
8484
VLOG(3) << std::this_thread::get_id() << " run " << op->Type()
8585
<< " on scope " << scope_;
86+
op->SetIsCalledByExecutor(false);
8687
op->Run(*scope_, place_);
8788
}
8889
}

paddle/fluid/framework/operator.cc

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License. */
2222
#include "paddle/fluid/framework/lod_tensor.h"
2323
#include "paddle/fluid/framework/operator.h"
2424
#include "paddle/fluid/framework/shape_inference.h"
25+
#include "paddle/fluid/framework/transfer_scope_cache.h"
2526
#include "paddle/fluid/framework/var_type.h"
2627
#include "paddle/fluid/platform/profiler.h"
2728

@@ -33,11 +34,6 @@ DEFINE_bool(check_nan_inf, false,
3334
namespace paddle {
3435
namespace framework {
3536

36-
// Combine two hash values to a single hash.
37-
inline size_t CombineHash(size_t seed, size_t a) {
38-
return (seed ^ a) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
39-
}
40-
4137
std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
4238
std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
4339
std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
@@ -797,17 +793,6 @@ void OperatorWithKernel::TransferInplaceVarsBack(
797793
Scope* OperatorWithKernel::TryTransferData(
798794
const Scope& scope, const OpKernelType& expected_kernel_key,
799795
std::vector<std::string>* transfered_inplace_vars) const {
800-
// In the inference scenerio, the scopes will be reused across the batches, so
801-
// the `new_scope` here will result in GPU memroy explosion over the running of
802-
// operators.
803-
// We use a thread_local cache to fix that issue, the key in the cache is the
804-
// combination of the `scope` argument, from_kernel_type, target_kernel_type.
805-
// Have a discussion with @Superjomn or the inference developers if some changes
806-
// on this logic for this macro might not tested on the other scenerios.
807-
#ifdef PADDLE_ON_INFERENCE
808-
thread_local std::unordered_map<size_t, Scope*> infer_transfer_scope_cache;
809-
#endif
810-
811796
Scope* new_scope = nullptr;
812797
for (auto& var_name_item : Inputs()) {
813798
for (auto& var_name : var_name_item.second) {
@@ -838,23 +823,23 @@ Scope* OperatorWithKernel::TryTransferData(
838823
VLOG(30) << "Transform Variable " << var_name << " from "
839824
<< kernel_type_for_var << " to " << expected_kernel_key;
840825

841-
#ifdef PADDLE_ON_INFERENCE
842-
size_t infer_cache_key =
843-
CombineHash(OpKernelType::Hash()(kernel_type_for_var),
844-
OpKernelType::Hash()(expected_kernel_key));
845-
infer_cache_key =
846-
CombineHash(infer_cache_key, std::hash<const Scope*>()(&scope));
847-
848-
auto it = infer_transfer_scope_cache.find(infer_cache_key);
849-
if (it != infer_transfer_scope_cache.end()) {
850-
new_scope = infer_transfer_scope_cache[infer_cache_key];
851-
} else {
852-
new_scope = &scope.NewScope();
853-
infer_transfer_scope_cache[infer_cache_key] = new_scope;
826+
// In the inference scenerio, the scopes will be reused across the
827+
// batches, so the `new_scope` here will result in GPU memroy explosion
828+
// over the running of operators.
829+
// We use a thread_local cache to fix that issue, the key in the cache is
830+
// the combination of the `scope` argument, from_kernel_type,
831+
// target_kernel_type.
832+
// Have a discussion with @Superjomn or the inference developers if some
833+
// changes on this logic for this macro might not tested on the other
834+
// scenerios.
835+
// If this op is not called by an Executor or ParallelExecutor, it should
836+
// called by a NaiveExecutor, the NaiveExecutor will cache the scopes and
837+
// variables, that behavior a lot different.
838+
if (!run_by_executor_) {
839+
new_scope = TryCreateTransferScope(kernel_type_for_var,
840+
expected_kernel_key, &scope);
854841
}
855-
#endif
856-
857-
if (new_scope == nullptr) {
842+
if (!new_scope) {
858843
new_scope = &scope.NewScope();
859844
}
860845

paddle/fluid/framework/operator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ class OperatorBase {
127127
//! Get all outputs variable names
128128
virtual std::vector<std::string> OutputVars(bool has_intermediate) const;
129129

130+
void SetIsCalledByExecutor(bool x) { run_by_executor_ = x; }
131+
130132
protected:
131133
std::string type_;
132134
// NOTE: in case of OpGrad, inputs_ contains:
@@ -139,6 +141,8 @@ class OperatorBase {
139141
// IG (Inputs Gradients)
140142
VariableNameMap outputs_;
141143
AttributeMap attrs_;
144+
// Whether this operator executes in an Executor.
145+
bool run_by_executor_{true};
142146

143147
private:
144148
void GenerateTemporaryNames();
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/framework/transfer_scope_cache.h"
16+
17+
namespace paddle {
18+
namespace framework {
19+
20+
std::unordered_map<size_t, Scope*>& global_transfer_data_cache() {
21+
thread_local auto* x = new std::unordered_map<size_t, Scope*>;
22+
return *x;
23+
}
24+
25+
std::unordered_set<Scope*>& global_transfer_scope_cache() {
26+
thread_local auto* x = new std::unordered_set<Scope*>;
27+
return *x;
28+
}
29+
30+
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
31+
const Scope* scope) {
32+
Scope* new_scope{nullptr};
33+
size_t infer_cache_key =
34+
CombineHash(OpKernelType::Hash()(type0), OpKernelType::Hash()(type1));
35+
infer_cache_key =
36+
CombineHash(infer_cache_key, std::hash<const Scope*>()(scope));
37+
38+
auto it = global_transfer_data_cache().find(infer_cache_key);
39+
if (it != global_transfer_data_cache().end()) {
40+
new_scope = global_transfer_data_cache()[infer_cache_key];
41+
} else {
42+
new_scope = &scope->NewScope();
43+
global_transfer_data_cache()[infer_cache_key] = new_scope;
44+
}
45+
global_transfer_scope_cache().insert(new_scope);
46+
return new_scope;
47+
}
48+
49+
void RemoveKidsFromTransferScopeCache(Scope* scope) {
50+
auto it = global_transfer_scope_cache().find(scope);
51+
if (it != global_transfer_scope_cache().end()) {
52+
global_transfer_scope_cache().erase(it);
53+
}
54+
for (auto* s : scope->kids()) {
55+
auto it = global_transfer_scope_cache().find(s);
56+
if (it != global_transfer_scope_cache().end()) {
57+
global_transfer_scope_cache().erase(it);
58+
}
59+
}
60+
61+
// remove global transfer data cache
62+
auto& cache = global_transfer_data_cache();
63+
for (auto it = cache.begin(); it != cache.end();) {
64+
if (it->second == scope)
65+
it = cache.erase(it);
66+
else
67+
it++;
68+
}
69+
}
70+
71+
} // namespace framework
72+
} // namespace paddle
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <thread> // NOLINT
18+
#include <unordered_map>
19+
#include <unordered_set>
20+
#include "paddle/fluid/framework/op_kernel_type.h"
21+
#include "paddle/fluid/framework/scope.h"
22+
23+
namespace paddle {
24+
namespace framework {
25+
26+
std::unordered_map<size_t, Scope*>& global_transfer_data_cache();
27+
28+
std::unordered_set<Scope*>& global_transfer_scope_cache();
29+
30+
// Combine two hash values to a single hash.
31+
static size_t CombineHash(size_t seed, size_t a) {
32+
return (seed ^ a) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
33+
}
34+
35+
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
36+
const Scope* scope);
37+
38+
void RemoveKidsFromTransferScopeCache(Scope* scope);
39+
40+
} // namespace framework
41+
} // namespace paddle

0 commit comments

Comments
 (0)