22
22
#include " paddle/fluid/framework/operator.h"
23
23
#include " paddle/fluid/platform/os_info.h"
24
24
#include " paddle/fluid/platform/profiler/event_tracing.h"
25
+ #include " paddle/fluid/platform/profiler/supplement_tracing.h"
25
26
#include " paddle/phi/core/kernel_context.h"
26
27
#ifdef PADDLE_WITH_MKLDNN
27
28
#include " paddle/fluid/platform/mkldnn_helper.h"
28
29
#endif
29
30
30
- PADDLE_DEFINE_EXPORTED_bool (new_executor_use_inplace, true ,
31
+ PADDLE_DEFINE_EXPORTED_bool (new_executor_use_inplace,
32
+ true ,
31
33
" Use inplace in new executor" );
32
- PADDLE_DEFINE_EXPORTED_bool (new_executor_use_local_scope, true ,
34
+ PADDLE_DEFINE_EXPORTED_bool (new_executor_use_local_scope,
35
+ true ,
33
36
" Use local_scope in new executor(especially used "
34
37
" in UT), can turn off for better performance" );
35
38
@@ -167,8 +170,8 @@ paddle::framework::FetchList InterpreterCore::Run(
167
170
// scope?
168
171
}
169
172
global_scope_->SetLocalScope (local_scope_);
170
- paddle::framework::interpreter::build_variable_scope (block_, global_scope_,
171
- create_local_scope_);
173
+ paddle::framework::interpreter::build_variable_scope (
174
+ block_, global_scope_, create_local_scope_);
172
175
std::vector<paddle::framework::OpFuncNode> op_func_nodes;
173
176
paddle::framework::interpreter::build_op_func_list (
174
177
place_, block_, &op_func_nodes, global_scope_, create_local_scope_);
@@ -490,7 +493,9 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
490
493
// If it is OperatorBase, InferShape do nothing.
491
494
if (op_with_kernel != nullptr ) {
492
495
platform::RecordEvent infershape_event (
493
- " infer_shape" , platform::TracerEventType::OperatorInner, 1 ,
496
+ " infer_shape" ,
497
+ platform::TracerEventType::OperatorInner,
498
+ 1 ,
494
499
platform::EventRole::kInnerOp );
495
500
496
501
// see OperatorWithKernel::RunImpl in operator.cc for why
@@ -499,6 +504,11 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
499
504
op_with_kernel->Info ().infer_shape_ (
500
505
instr_node.InnerInferShapeContext ().get ());
501
506
}
507
+ infershape_event.End ();
508
+ platform::RecordOpInfoSupplement (op->Type (),
509
+ op->Attrs (),
510
+ *(instr_node.InnerInferShapeContext ()),
511
+ *(instr_node.InnerRuntimeContext ()));
502
512
}
503
513
}
504
514
@@ -516,7 +526,9 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
516
526
517
527
{
518
528
platform::RecordEvent compute_event (
519
- " compute" , platform::TracerEventType::OperatorInner, 1 ,
529
+ " compute" ,
530
+ platform::TracerEventType::OperatorInner,
531
+ 1 ,
520
532
platform::EventRole::kInnerOp );
521
533
if (op_with_kernel == nullptr ) {
522
534
instr_node.OpBase ()->Run (*local_scope, place_);
@@ -571,7 +583,8 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
571
583
if (op_with_kernel != nullptr && FLAGS_check_nan_inf) {
572
584
VLOG (4 ) << " Check nan/inf" ;
573
585
framework::details::CheckOpHasNanOrInf (
574
- *op, *global_scope_,
586
+ *op,
587
+ *global_scope_,
575
588
place); // TODO(xiongkun03) change it to inner scope.
576
589
}
577
590
}
@@ -596,10 +609,14 @@ void InterpreterCore::ExecuteInstructionList(
596
609
597
610
for (size_t i = 0 ; i < dependecy_count_.size (); ++i) {
598
611
if (dependecy_count_[i] == 0 ) {
599
- async_work_queue_->AddTask (vec_instr.at (i).KernelType (), [
600
- this , i, atomic_deps = atomic_deps.get (),
601
- atomic_var_ref = atomic_var_ref.get ()
602
- ] { RunInstructionAsync (i, atomic_deps, atomic_var_ref); });
612
+ async_work_queue_->AddTask (vec_instr.at (i).KernelType (),
613
+ [this ,
614
+ i,
615
+ atomic_deps = atomic_deps.get (),
616
+ atomic_var_ref = atomic_var_ref.get ()] {
617
+ RunInstructionAsync (
618
+ i, atomic_deps, atomic_var_ref);
619
+ });
603
620
}
604
621
}
605
622
@@ -615,7 +632,8 @@ void InterpreterCore::ExecuteInstructionList(
615
632
}
616
633
VLOG (4 ) << " Cancel ok" ;
617
634
PADDLE_ENFORCE_EQ (
618
- main_thread_blocker_.Clear (), 0 ,
635
+ main_thread_blocker_.Clear (),
636
+ 0 ,
619
637
platform::errors::PreconditionNotMet (
620
638
" main_thread_blocker_.Clear() return -1, clear failed" ));
621
639
VLOG (4 ) << " clear ok" ;
@@ -624,7 +642,8 @@ void InterpreterCore::ExecuteInstructionList(
624
642
}
625
643
626
644
void InterpreterCore::RunNextInstructions (
627
- const Instruction& instr, std::queue<size_t >* reserved_next_ops,
645
+ const Instruction& instr,
646
+ std::queue<size_t >* reserved_next_ops,
628
647
std::vector<std::atomic<size_t >>* atomic_deps,
629
648
std::vector<std::atomic<size_t >>* atomic_var_ref) {
630
649
auto & next_instr = instr.NextInstructions ();
@@ -691,7 +710,8 @@ void InterpreterCore::RunNextInstructions(
691
710
}
692
711
693
712
void InterpreterCore::RunInstructionAsync (
694
- size_t instr_id, std::vector<std::atomic<size_t >>* atomic_deps,
713
+ size_t instr_id,
714
+ std::vector<std::atomic<size_t >>* atomic_deps,
695
715
std::vector<std::atomic<size_t >>* atomic_var_ref) {
696
716
std::queue<size_t > ready_ops;
697
717
ready_ops.push (instr_id);
@@ -700,10 +720,10 @@ void InterpreterCore::RunInstructionAsync(
700
720
ready_ops.pop ();
701
721
auto & instr_node = vec_instruction_.at (instr_id);
702
722
VLOG (5 ) << __func__ << " OP id:" << instr_node.Id ()
703
- << " name:" << instr_node.OpBase ()->Type ()
704
- << " type: " << (instr_node.KernelType () == OpFuncType::kQueueSync
705
- ? " kQueueSync"
706
- : " kQueueAsync" )
723
+ << " name:" << instr_node.OpBase ()->Type () << " type: "
724
+ << (instr_node.KernelType () == OpFuncType::kQueueSync
725
+ ? " kQueueSync"
726
+ : " kQueueAsync" )
707
727
<< " runs on " << platform::GetCurrentThreadName ();
708
728
709
729
auto * op = instr_node.OpBase ();
@@ -877,12 +897,14 @@ void InterpreterCore::CheckGC(
877
897
878
898
} else {
879
899
static_cast <InterpreterCoreEventGarbageCollector*>(gc_.get ())->Add (
880
- var_scope.Var (var_id), &gc_event_.at (instr_id),
900
+ var_scope.Var (var_id),
901
+ &gc_event_.at (instr_id),
881
902
&instr.DeviceContext ());
882
903
}
883
904
#else
884
905
static_cast <InterpreterCoreEventGarbageCollector*>(gc_.get ())->Add (
885
- var_scope.Var (var_id), &gc_event_.at (instr_id),
906
+ var_scope.Var (var_id),
907
+ &gc_event_.at (instr_id),
886
908
&instr.DeviceContext ());
887
909
#endif
888
910
}
@@ -891,20 +913,24 @@ void InterpreterCore::CheckGC(
891
913
892
914
void InterpreterCore::Prepare (
893
915
const std::vector<std::string>& feed_names,
894
- const std::vector<framework::LoDTensor>& feed_tensors, bool prepare_feed) {
895
- PADDLE_ENFORCE_EQ (feed_names.size (), feed_tensors.size (),
916
+ const std::vector<framework::LoDTensor>& feed_tensors,
917
+ bool prepare_feed) {
918
+ PADDLE_ENFORCE_EQ (feed_names.size (),
919
+ feed_tensors.size (),
896
920
platform::errors::PreconditionNotMet (
897
921
" Required feed_names.size() == feed_tensors.size(), "
898
922
" but received %d != %d" ,
899
- feed_names.size (), feed_tensors.size ()));
923
+ feed_names.size (),
924
+ feed_tensors.size ()));
900
925
901
926
auto FeedInput = [&] {
902
927
VLOG (4 ) << " Feed inputs" ;
903
928
for (size_t i = 0 ; i < feed_names.size (); ++i) {
904
929
auto * feed_var = global_scope_->FindVar (feed_names[i]);
905
930
PADDLE_ENFORCE_NOT_NULL (
906
- feed_var, platform::errors::NotFound (
907
- " Variable %s should not be nullptr." , feed_names[i]));
931
+ feed_var,
932
+ platform::errors::NotFound (" Variable %s should not be nullptr." ,
933
+ feed_names[i]));
908
934
909
935
auto feed_tensor = feed_var->GetMutable <framework::LoDTensor>();
910
936
feed_tensor->ShareDataWith (feed_tensors[i]);
@@ -913,8 +939,8 @@ void InterpreterCore::Prepare(
913
939
};
914
940
915
941
if (!is_build_) {
916
- paddle::framework::interpreter::build_variable_scope (block_, global_scope_,
917
- create_local_scope_);
942
+ paddle::framework::interpreter::build_variable_scope (
943
+ block_, global_scope_, create_local_scope_);
918
944
FeedInput ();
919
945
std::vector<paddle::framework::OpFuncNode> op_func_nodes;
920
946
paddle::framework::interpreter::build_op_func_list (
0 commit comments