Skip to content

Commit cda7842

Browse files
committed
Revert "Revert "Add Python Callstacks when Op::Run error (PaddlePaddle#12759)""
This reverts commit 1f27027.
1 parent 9557cc2 commit cda7842

File tree

7 files changed

+61
-17
lines changed

7 files changed

+61
-17
lines changed

paddle/fluid/framework/op_proto_maker.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
131131

132132
AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.")
133133
.SetDefault("");
134-
134+
AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
135+
"Callstack for Op Creatation.")
136+
.SetDefault({});
135137
Validate();
136138
}
137139

paddle/fluid/framework/op_proto_maker.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class OpProtoAndCheckerMaker {
4040
static const char *OpRoleAttrName() { return "op_role"; }
4141
static const char *OpRoleVarAttrName() { return "op_role_var"; }
4242
static const char *OpNamescopeAttrName() { return "op_namescope"; }
43+
static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
4344

4445
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
4546

paddle/fluid/framework/operator.cc

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
1111
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
14-
#include <gflags/gflags.h>
15-
#include <glog/logging.h>
16-
14+
#include "paddle/fluid/framework/operator.h"
1715
#include <algorithm>
18-
16+
#include <sstream>
17+
#include <string>
18+
#include <vector>
19+
#include "gflags/gflags.h"
20+
#include "glog/logging.h"
1921
#include "paddle/fluid/framework/data_transform.h"
2022
#include "paddle/fluid/framework/executor.h"
2123
#include "paddle/fluid/framework/lod_tensor.h"
22-
#include "paddle/fluid/framework/operator.h"
24+
#include "paddle/fluid/framework/op_proto_maker.h"
2325
#include "paddle/fluid/framework/shape_inference.h"
2426
#include "paddle/fluid/framework/var_type.h"
2527
#include "paddle/fluid/platform/profiler.h"
@@ -137,19 +139,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
137139
}
138140

139141
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
140-
VLOG(4) << place << " " << DebugStringEx(&scope);
141-
if (platform::is_gpu_place(place)) {
142+
try {
143+
if (VLOG_IS_ON(4)) {
144+
VLOG(4) << place << " " << DebugStringEx(&scope);
145+
}
146+
if (platform::is_gpu_place(place)) {
142147
#ifndef PADDLE_WITH_CUDA
143-
PADDLE_THROW("Cannot run operator on place %s", place);
148+
PADDLE_THROW("Cannot run operator on place %s", place);
144149
#else
145-
auto dev_id = boost::get<platform::CUDAPlace>(place).device;
146-
platform::SetDeviceId(dev_id);
150+
auto dev_id = boost::get<platform::CUDAPlace>(place).device;
151+
platform::SetDeviceId(dev_id);
147152
#endif
153+
}
154+
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
155+
platform::RecordEvent record_event(Type(), pool.Get(place));
156+
RunImpl(scope, place);
157+
if (VLOG_IS_ON(3)) {
158+
VLOG(3) << place << " " << DebugStringEx(&scope);
159+
}
160+
} catch (platform::EnforceNotMet exception) {
161+
if (Attrs().count("sub_block") != 0) {
162+
throw exception;
163+
}
164+
165+
auto& callstack = Attr<std::vector<std::string>>(
166+
OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
167+
168+
if (callstack.empty()) {
169+
throw exception;
170+
}
171+
std::ostringstream sout;
172+
sout << "Invoke operator " << Type() << " error.\n";
173+
sout << "Python Callstacks: \n";
174+
for (auto& line : callstack) {
175+
sout << line;
176+
}
177+
sout << "C++ Callstacks: \n";
178+
sout << exception.err_str_;
179+
exception.err_str_ = sout.str();
180+
throw exception;
181+
} catch (...) {
182+
std::rethrow_exception(std::current_exception());
148183
}
149-
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
150-
platform::RecordEvent record_event(Type(), pool.Get(place));
151-
RunImpl(scope, place);
152-
VLOG(3) << place << " " << DebugStringEx(&scope);
153184
}
154185

155186
bool OperatorBase::HasInputs(const std::string& name) const {
@@ -177,7 +208,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
177208
}
178209

179210
bool OperatorBase::HasOutputs(const std::string& name) const {
180-
if (outputs_.find(name) != outputs_.end()) {
211+
if (outputs_.end() != outputs_.find(name)) {
181212
return true;
182213
} else {
183214
return false;

paddle/fluid/operators/top_k_op.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel {
3030
"Output(Indices) of TopkOp should not be null.");
3131

3232
auto input_dims = ctx->GetInputDim("X");
33+
PADDLE_ENFORCE_EQ(input_dims.size(), 2,
34+
"Rank of TopK op's input must be 2.");
3335
const int k = static_cast<int>(ctx->Attrs().Get<int>("k"));
3436

3537
PADDLE_ENFORCE_GE(k, 1, "k must >= 1");

paddle/fluid/pybind/const_value.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ void BindConstValue(pybind11::module* m) {
4646
op_proto_and_checker_maker.def(
4747
"kOpNameScopeAttrName",
4848
framework::OpProtoAndCheckerMaker::OpNamescopeAttrName);
49+
op_proto_and_checker_maker.def(
50+
"kOpCreationCallstackAttrName",
51+
framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
4952
}
5053

5154
} // namespace pybind

python/paddle/fluid/framework.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import contextlib
1919
import re
2020
import six
21+
import traceback
2122

2223
import numpy as np
2324

@@ -572,6 +573,10 @@ def __init__(self,
572573
if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
573574
del op_attrs[role_var_name]
574575

576+
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
577+
op_attrs[callstack_var_name] = list(
578+
reversed(traceback.format_stack()))[1:]
579+
575580
if len(self.desc.type()) != 0:
576581
return
577582
if type is None:

python/paddle/fluid/tests/unittests/test_operator_desc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def test_op_desc_creation(self):
6969
set(mul_op.attr_names),
7070
set([
7171
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
72-
"op_namescope"
72+
"op_namescope", "op_callstack"
7373
]))
7474
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
7575
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)

0 commit comments

Comments
 (0)