PaddlePaddle
diff --git a/‎doc/design/float16.md
Lines changed: 1 addition & 1 deletion b/‎doc/design/float16.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/framework/op_registry.h
Lines changed: 1 addition & 2 deletions b/‎paddle/framework/op_registry.h
Lines changed: 1 addition & 2 deletions
diff --git a/‎paddle/framework/operator.cc
Lines changed: 34 additions & 3 deletions b/‎paddle/framework/operator.cc
Lines changed: 34 additions & 3 deletions
diff --git a/‎paddle/framework/operator.h
Lines changed: 27 additions & 52 deletions b/‎paddle/framework/operator.h
Lines changed: 27 additions & 52 deletions
diff --git a/‎paddle/framework/operator_test.cc
Lines changed: 2 additions & 2 deletions b/‎paddle/framework/operator_test.cc
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/operators/accuracy_op.cc
Lines changed: 4 additions & 3 deletions b/‎paddle/operators/accuracy_op.cc
Lines changed: 4 additions & 3 deletions
diff --git a/‎paddle/operators/auc_op.cc
Lines changed: 4 additions & 3 deletions b/‎paddle/operators/auc_op.cc
Lines changed: 4 additions & 3 deletions
diff --git a/‎paddle/operators/batch_norm_op.cc
Lines changed: 4 additions & 2 deletions b/‎paddle/operators/batch_norm_op.cc
Lines changed: 4 additions & 2 deletions
diff --git a/‎paddle/operators/crf_decoding_op.cc
Lines changed: 4 additions & 2 deletions b/‎paddle/operators/crf_decoding_op.cc
Lines changed: 4 additions & 2 deletions
diff --git a/‎paddle/operators/cross_entropy_op.cc
Lines changed: 8 additions & 4 deletions b/‎paddle/operators/cross_entropy_op.cc
Lines changed: 8 additions & 4 deletions
@@ -55,6 +55,6 @@ After float16 class is available, some of the future items are below:
 
 - Update pybind/tensor_py.h to bind c++ float16 with numpy float16. 
 
-- Modify `IndicateDataType()` method in `framework/operator.h` to make it compatible with float16.
+- Modify `GetKernelType()` method in `framework/operator.h` to make it compatible with float16.
 
 - Create a type-casting operator that can convert the data type in tensor between float16 and other types.
@@ -92,8 +92,7 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
 
   void operator()(const char* op_type) const {
     using T = typename KERNEL_TYPE::ELEMENT_TYPE;
-    OperatorWithKernel::OpKernelKey key(ToDataType(std::type_index(typeid(T))),
-                                        PlaceType());
+    OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType());
     OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE);
 
     constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
 
@@ -254,8 +254,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
   return res;
 }
 
-std::ostream& operator<<(std::ostream& os,
-                         const OperatorWithKernel::OpKernelKey& kernel_key) {
+std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key) {
   os << "place[" << kernel_key.place_ << "]:data_type[" << kernel_key.data_type_
      << "]";
   return os;
@@ -432,7 +431,7 @@ void OperatorWithKernel::Run(const Scope& scope,
 
   // check if op[type] have kernel for kernel_key
   OpKernelMap& kernels = kernels_iter->second;
-  auto kernel_key = OpKernelKey(IndicateDataType(ctx), dev_ctx);
+  auto kernel_key = GetKernelType(ctx);
   auto kernel_iter = kernels.find(kernel_key);
 
   if (kernel_iter == kernels.end()) {
@@ -444,6 +443,38 @@ void OperatorWithKernel::Run(const Scope& scope,
   // throws errors if have.
   dev_ctx.Finish();
 }
+OpKernelType OperatorWithKernel::GetKernelType(
+    const ExecutionContext& ctx) const {
+  return OpKernelType(IndicateDataType(ctx), ctx.device_context());
+}
+DataType OperatorWithKernel::IndicateDataType(
+    const ExecutionContext& ctx) const {
+  auto& scope = ctx.scope();
+  int data_type = -1;
+  for (auto& input : this->inputs_) {
+    for (auto& ipt_name : input.second) {
+      auto* var = scope.FindVar(ipt_name);
+      if (var != nullptr) {
+        const Tensor* t = nullptr;
+        if (var->IsType<Tensor>()) {
+          t = &var->Get<Tensor>();
+        } else if (var->IsType<LoDTensor>()) {
+          t = &var->Get<LoDTensor>();
+        } else if (var->IsType<SelectedRows>()) {
+          t = &(var->Get<SelectedRows>().value());
+        }
+        if (t != nullptr) {
+          int tmp = static_cast<int>(ToDataType(t->type()));
+          PADDLE_ENFORCE(tmp == data_type || data_type == -1,
+                         "DataType of Paddle Op %s must be the same.", Type());
+          data_type = tmp;
+        }
+      }
+    }
+  }
+  PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input");
+  return static_cast<DataType>(data_type);
+}
 
 }  // namespace framework
 }  // namespace paddle
@@ -345,27 +345,10 @@ class OpKernel : public OpKernelBase {
   using ELEMENT_TYPE = T;
 };
 
-class OperatorWithKernel : public OperatorBase {
- public:
-  struct OpKernelKey {
-    platform::Place place_;
-    DataType data_type_;
-
-    OpKernelKey(DataType data_type, platform::Place place)
-        : place_(place), data_type_(data_type) {}
-
-    OpKernelKey(DataType data_type, const platform::DeviceContext& dev_ctx)
-        : place_(dev_ctx.GetPlace()), data_type_(data_type) {}
-
-    bool operator==(const OpKernelKey& o) const {
-      return platform::places_are_same_class(place_, o.place_) &&
-             data_type_ == o.data_type_;
-    }
-  };
-
-  struct OpKernelHash {
+struct OpKernelType {
+  struct Hash {
     std::hash<int> hash_;
-    size_t operator()(const OpKernelKey& key) const {
+    size_t operator()(const OpKernelType& key) const {
       int place = key.place_.which();
       int data_type = static_cast<int>(key.data_type_);
       int pre_hash = data_type << NUM_PLACE_TYPE_LIMIT_IN_BIT |
@@ -374,9 +357,26 @@ class OperatorWithKernel : public OperatorBase {
     }
   };
 
+  platform::Place place_;
+  DataType data_type_;
+
+  OpKernelType(DataType data_type, platform::Place place)
+      : place_(place), data_type_(data_type) {}
+
+  OpKernelType(DataType data_type, const platform::DeviceContext& dev_ctx)
+      : place_(dev_ctx.GetPlace()), data_type_(data_type) {}
+
+  bool operator==(const OpKernelType& o) const {
+    return platform::places_are_same_class(place_, o.place_) &&
+           data_type_ == o.data_type_;
+  }
+};
+
+class OperatorWithKernel : public OperatorBase {
+ public:
   using OpKernelMap =
-      std::unordered_map<OpKernelKey, std::unique_ptr<OpKernelBase>,
-                         OpKernelHash>;
+      std::unordered_map<OpKernelType, std::unique_ptr<OpKernelBase>,
+                         OpKernelType::Hash>;
 
   OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
                      const VariableNameMap& outputs, const AttributeMap& attrs)
@@ -404,40 +404,15 @@ class OperatorWithKernel : public OperatorBase {
   }
 
  protected:
+  virtual OpKernelType GetKernelType(const ExecutionContext& ctx) const;
+
+ private:
   // indicate kernel DataType by input data. Defaultly all input data must be
   // same.
-  virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
-    auto& scope = ctx.scope();
-    int data_type = -1;
-    for (auto& input : this->inputs_) {
-      for (auto& ipt_name : input.second) {
-        auto* var = scope.FindVar(ipt_name);
-        if (var != nullptr) {
-          const Tensor* t = nullptr;
-          if (var->IsType<Tensor>()) {
-            t = &var->Get<Tensor>();
-          } else if (var->IsType<LoDTensor>()) {
-            t = &var->Get<LoDTensor>();
-          } else if (var->IsType<SelectedRows>()) {
-            t = &(var->Get<SelectedRows>().value());
-          }
-          if (t != nullptr) {
-            int tmp = static_cast<int>(ToDataType(t->type()));
-            PADDLE_ENFORCE(tmp == data_type || data_type == -1,
-                           "DataType of Paddle Op %s must be the same.",
-                           Type());
-            data_type = tmp;
-          }
-        }
-      }
-    }
-    PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input");
-    return static_cast<DataType>(data_type);
-  }
+  DataType IndicateDataType(const ExecutionContext& ctx) const;
 };
 
-std::ostream& operator<<(std::ostream& os,
-                         const OperatorWithKernel::OpKernelKey& kernel_key);
+std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key);
 
 extern bool OpSupportGPU(const std::string& op_type);
 
 
@@ -114,8 +114,8 @@ class OpWithKernelTest : public OperatorWithKernel {
 
  protected:
   void InferShape(framework::InferShapeContext* ctx) const override {}
-  DataType IndicateDataType(const ExecutionContext& ctx) const override {
-    return DataType::FP32;
+  OpKernelType GetKernelType(const ExecutionContext& ctx) const override {
+    return OpKernelType(DataType::FP32, ctx.device_context());
   }
 };
 
 
@@ -47,10 +47,11 @@ class AccuracyOp : public framework::OperatorWithKernel {
   }
 
  protected:
-  // IndicateDataType
-  framework::DataType IndicateDataType(
+  framework::OpKernelType GetKernelType(
       const framework::ExecutionContext &ctx) const override {
-    return framework::ToDataType(ctx.Input<Tensor>("Out")->type());
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("Out")->type()),
+        ctx.device_context());
   }
 };
 
 
@@ -39,10 +39,11 @@ class AucOp : public framework::OperatorWithKernel {
   }
 
  protected:
-  // IndicateDataType
-  framework::DataType IndicateDataType(
+  framework::OpKernelType GetKernelType(
       const framework::ExecutionContext &ctx) const override {
-    return framework::ToDataType(ctx.Input<Tensor>("Out")->type());
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("Out")->type()),
+        ctx.device_context());
   }
 };
 
 
@@ -303,7 +303,8 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
     ctx->SetOutputDim(framework::GradVarName("Bias"), {C});
   }
 
-  framework::DataType IndicateDataType(
+ protected:
+  framework::OpKernelType GetKernelType(
       const framework::ExecutionContext &ctx) const override {
     const auto *var = ctx.InputVar(framework::GradVarName("Y"));
     if (var == nullptr) {
@@ -318,7 +319,8 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
     if (t == nullptr) {
       PADDLE_THROW("can't find Y@GRAD");
     }
-    return framework::ToDataType(t->type());
+    return framework::OpKernelType(framework::ToDataType(t->type()),
+                                   ctx.device_context());
   }
 };
 
 
@@ -120,9 +120,11 @@ class CRFDecodingOp : public framework::OperatorWithKernel {
   }
 
  protected:
-  framework::DataType IndicateDataType(
+  framework::OpKernelType GetKernelType(
       const framework::ExecutionContext& ctx) const override {
-    return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type()),
+        ctx.device_context());
   }
 };
 }  // namespace operators
 
@@ -51,9 +51,11 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
  protected:
   // Explicitly set that the data type of computation kernel of cross_entropy
   // is determined by its input "X".
-  framework::DataType IndicateDataType(
+  framework::OpKernelType GetKernelType(
       const framework::ExecutionContext& ctx) const override {
-    return framework::ToDataType(ctx.Input<Tensor>("X")->type());
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("X")->type()),
+        ctx.device_context());
   }
 };
 
@@ -98,9 +100,11 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
  protected:
   // Explicitly set that the data type of computation kernel of cross_entropy
   // is determined by its input "X".
-  framework::DataType IndicateDataType(
+  framework::OpKernelType GetKernelType(
       const framework::ExecutionContext& ctx) const override {
-    return framework::ToDataType(ctx.Input<Tensor>("X")->type());
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("X")->type()),
+        ctx.device_context());
   }
 };
Original file line number	Diff line number	Diff line change
`@@ -114,8 +114,8 @@ class OpWithKernelTest : public OperatorWithKernel {`
`114`	`114`
`115`	`115`	`protected:`
`116`	`116`	`void InferShape(framework::InferShapeContext* ctx) const override {}`
`117`		`- DataType IndicateDataType(const ExecutionContext& ctx) const override {`
`118`		`- return DataType::FP32;`
	`117`	`+ OpKernelType GetKernelType(const ExecutionContext& ctx) const override {`
	`118`	`+ return OpKernelType(DataType::FP32, ctx.device_context());`
`119`	`119`	`}`
`120`	`120`	`};`
`121`	`121`
Original file line number	Diff line number	Diff line change
`@@ -47,10 +47,11 @@ class AccuracyOp : public framework::OperatorWithKernel {`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`protected:`
`50`		`- // IndicateDataType`
`51`		`- framework::DataType IndicateDataType(`
	`50`	`+ framework::OpKernelType GetKernelType(`
`52`	`51`	`const framework::ExecutionContext &ctx) const override {`
`53`		`- return framework::ToDataType(ctx.Input<Tensor>("Out")->type());`
	`52`	`+ return framework::OpKernelType(`
	`53`	`+ framework::ToDataType(ctx.Input<Tensor>("Out")->type()),`
	`54`	`+ ctx.device_context());`
`54`	`55`	`}`
`55`	`56`	`};`
`56`	`57`
Original file line number	Diff line number	Diff line change
`@@ -39,10 +39,11 @@ class AucOp : public framework::OperatorWithKernel {`
`39`	`39`	`}`
`40`	`40`
`41`	`41`	`protected:`
`42`		`- // IndicateDataType`
`43`		`- framework::DataType IndicateDataType(`
	`42`	`+ framework::OpKernelType GetKernelType(`
`44`	`43`	`const framework::ExecutionContext &ctx) const override {`
`45`		`- return framework::ToDataType(ctx.Input<Tensor>("Out")->type());`
	`44`	`+ return framework::OpKernelType(`
	`45`	`+ framework::ToDataType(ctx.Input<Tensor>("Out")->type()),`
	`46`	`+ ctx.device_context());`
`46`	`47`	`}`
`47`	`48`	`};`
`48`	`49`
Original file line number	Diff line number	Diff line change
`@@ -303,7 +303,8 @@ class BatchNormGradOp : public framework::OperatorWithKernel {`
`303`	`303`	`ctx->SetOutputDim(framework::GradVarName("Bias"), {C});`
`304`	`304`	`}`
`305`	`305`
`306`		`- framework::DataType IndicateDataType(`
	`306`	`+ protected:`
	`307`	`+ framework::OpKernelType GetKernelType(`
`307`	`308`	`const framework::ExecutionContext &ctx) const override {`
`308`	`309`	`const auto *var = ctx.InputVar(framework::GradVarName("Y"));`
`309`	`310`	`if (var == nullptr) {`
`@@ -318,7 +319,8 @@ class BatchNormGradOp : public framework::OperatorWithKernel {`
`318`	`319`	`if (t == nullptr) {`
`319`	`320`	`PADDLE_THROW("can't find Y@GRAD");`
`320`	`321`	`}`
`321`		`- return framework::ToDataType(t->type());`
	`322`	`+ return framework::OpKernelType(framework::ToDataType(t->type()),`
	`323`	`+ ctx.device_context());`
`322`	`324`	`}`
`323`	`325`	`};`
`324`	`326`
Original file line number	Diff line number	Diff line change
`@@ -120,9 +120,11 @@ class CRFDecodingOp : public framework::OperatorWithKernel {`
`120`	`120`	`}`
`121`	`121`
`122`	`122`	`protected:`
`123`		`- framework::DataType IndicateDataType(`
	`123`	`+ framework::OpKernelType GetKernelType(`
`124`	`124`	`const framework::ExecutionContext& ctx) const override {`
`125`		`- return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());`
	`125`	`+ return framework::OpKernelType(`
	`126`	`+ framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type()),`
	`127`	`+ ctx.device_context());`
`126`	`128`	`}`
`127`	`129`	`};`
`128`	`130`	`} // namespace operators`