Skip to content

Commit 21bb9e9

Browse files
authored
Merge pull request PaddlePaddle#13201 from reyoung/revert_callstack
Revert "Revert "Add Python Callstacks when Op::Run error (PaddlePaddle#12759)""
2 parents 3fa68dc + 28cc1e1 commit 21bb9e9

File tree

8 files changed

+65
-17
lines changed

8 files changed

+65
-17
lines changed

paddle/fluid/framework/op_proto_maker.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
132132

133133
AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.")
134134
.SetDefault("");
135-
135+
AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
136+
"Callstack for Op Creatation.")
137+
.SetDefault({});
136138
Validate();
137139
}
138140

paddle/fluid/framework/op_proto_maker.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker {
4646
static const char *OpRoleAttrName() { return "op_role"; }
4747
static const char *OpRoleVarAttrName() { return "op_role_var"; }
4848
static const char *OpNamescopeAttrName() { return "op_namescope"; }
49+
static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
4950

5051
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
5152

paddle/fluid/framework/operator.cc

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,17 @@ limitations under the License. */
1414
#define GLOG_NO_ABBREVIATED_SEVERITIES
1515
#define GOOGLE_GLOG_DLL_DECL
1616

17+
#include "paddle/fluid/framework/operator.h"
1718
#include <gflags/gflags.h>
1819
#include <glog/logging.h>
19-
2020
#include <algorithm>
21-
21+
#include <sstream>
22+
#include <string>
23+
#include <vector>
2224
#include "paddle/fluid/framework/data_transform.h"
2325
#include "paddle/fluid/framework/executor.h"
2426
#include "paddle/fluid/framework/lod_tensor.h"
25-
#include "paddle/fluid/framework/operator.h"
27+
#include "paddle/fluid/framework/op_proto_maker.h"
2628
#include "paddle/fluid/framework/shape_inference.h"
2729
#include "paddle/fluid/framework/var_type.h"
2830
#include "paddle/fluid/platform/profiler.h"
@@ -140,19 +142,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
140142
}
141143

142144
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
143-
VLOG(4) << place << " " << DebugStringEx(&scope);
144-
if (platform::is_gpu_place(place)) {
145+
try {
146+
if (VLOG_IS_ON(4)) {
147+
VLOG(4) << place << " " << DebugStringEx(&scope);
148+
}
149+
if (platform::is_gpu_place(place)) {
145150
#ifndef PADDLE_WITH_CUDA
146-
PADDLE_THROW("Cannot run operator on place %s", place);
151+
PADDLE_THROW("Cannot run operator on place %s", place);
147152
#else
148-
auto dev_id = boost::get<platform::CUDAPlace>(place).device;
149-
platform::SetDeviceId(dev_id);
153+
auto dev_id = boost::get<platform::CUDAPlace>(place).device;
154+
platform::SetDeviceId(dev_id);
150155
#endif
156+
}
157+
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
158+
platform::RecordEvent record_event(Type(), pool.Get(place));
159+
RunImpl(scope, place);
160+
if (VLOG_IS_ON(3)) {
161+
VLOG(3) << place << " " << DebugStringEx(&scope);
162+
}
163+
} catch (platform::EnforceNotMet exception) {
164+
if (Attrs().count("sub_block") != 0) {
165+
throw exception;
166+
}
167+
168+
auto& callstack = Attr<std::vector<std::string>>(
169+
OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
170+
171+
if (callstack.empty()) {
172+
throw exception;
173+
}
174+
std::ostringstream sout;
175+
sout << "Invoke operator " << Type() << " error.\n";
176+
sout << "Python Callstacks: \n";
177+
for (auto& line : callstack) {
178+
sout << line;
179+
}
180+
sout << "C++ Callstacks: \n";
181+
sout << exception.err_str_;
182+
exception.err_str_ = sout.str();
183+
throw exception;
184+
} catch (...) {
185+
std::rethrow_exception(std::current_exception());
151186
}
152-
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
153-
platform::RecordEvent record_event(Type(), pool.Get(place));
154-
RunImpl(scope, place);
155-
VLOG(3) << place << " " << DebugStringEx(&scope);
156187
}
157188

158189
bool OperatorBase::HasInputs(const std::string& name) const {
@@ -180,7 +211,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
180211
}
181212

182213
bool OperatorBase::HasOutputs(const std::string& name) const {
183-
if (outputs_.find(name) != outputs_.end()) {
214+
if (outputs_.end() != outputs_.find(name)) {
184215
return true;
185216
} else {
186217
return false;

paddle/fluid/operators/tensorrt_engine_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace operators {
3636
using FluidDT = framework::proto::VarType_Type;
3737
using TRT_DT = nvinfer1::DataType;
3838

39-
namespace {
39+
namespace { // NOLINT
4040

4141
TRT_DT FluidDataType2TRT(FluidDT type) {
4242
switch (type) {

paddle/fluid/operators/top_k_op.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel {
3030
"Output(Indices) of TopkOp should not be null.");
3131

3232
auto input_dims = ctx->GetInputDim("X");
33+
PADDLE_ENFORCE_EQ(input_dims.size(), 2,
34+
"Rank of TopK op's input must be 2.");
3335
const int k = static_cast<int>(ctx->Attrs().Get<int>("k"));
3436

3537
PADDLE_ENFORCE_GE(k, 1, "k must >= 1");

paddle/fluid/pybind/const_value.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) {
4848
op_proto_and_checker_maker.def(
4949
"kOpNameScopeAttrName",
5050
framework::OpProtoAndCheckerMaker::OpNamescopeAttrName);
51+
op_proto_and_checker_maker.def(
52+
"kOpCreationCallstackAttrName",
53+
framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
5154
}
5255

5356
} // namespace pybind

python/paddle/fluid/framework.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import contextlib
1919
import re
2020
import six
21+
import traceback
2122

2223
import numpy as np
2324

@@ -34,6 +35,8 @@
3435
except Exception as e:
3536
raise e
3637
from . import unique_name
38+
import os
39+
PADDLE_ON_MODEL_CE = os.environ.get('PADDLE_ON_MODEL_CE', None) is not None
3740

3841
__all__ = [
3942
'Program',
@@ -489,7 +492,8 @@ def generated_op_attr_names():
489492
return {
490493
core.op_proto_and_checker_maker.kOpRoleAttrName(),
491494
core.op_proto_and_checker_maker.kOpRoleVarAttrName(),
492-
core.op_proto_and_checker_maker.kOpNameScopeAttrName()
495+
core.op_proto_and_checker_maker.kOpNameScopeAttrName(),
496+
core.op_proto_and_checker_maker.kOpCreationCallstackAttrName()
493497
}
494498

495499

@@ -572,6 +576,11 @@ def __init__(self,
572576
if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
573577
del op_attrs[role_var_name]
574578

579+
if not PADDLE_ON_MODEL_CE:
580+
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
581+
op_attrs[callstack_var_name] = list(
582+
reversed(traceback.format_stack()))[1:]
583+
575584
if len(self.desc.type()) != 0:
576585
return
577586
if type is None:

python/paddle/fluid/tests/unittests/test_operator_desc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def test_op_desc_creation(self):
6969
set(mul_op.attr_names),
7070
set([
7171
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
72-
"op_namescope"
72+
"op_namescope", "op_callstack"
7373
]))
7474
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
7575
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)

0 commit comments

Comments
 (0)