Add FP16 option in save_combine_op (#10471)

sidgoyal78 · web-flow · commit 283c4dbe57c0 · 2018-05-10T13:12:22.000-07:00
* Add FP16 option in save_combine_op

* Fix casting issue
diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <numeric>
 #include <sstream>
 #include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
@@ -69,6 +70,7 @@ class SaveCombineOp : public framework::OperatorBase {
                const platform::Place &place) const override {
     auto filename = Attr<std::string>("file_path");
     auto overwrite = Attr<bool>("overwrite");
+    auto save_as_fp16 = Attr<bool>("save_as_fp16");
 
     bool is_present = FileExists(filename);
     if (is_present && !overwrite) {
@@ -100,8 +102,24 @@ class SaveCombineOp : public framework::OperatorBase {
                      inp_var_names[i]);
 
       auto &tensor = var->Get<framework::LoDTensor>();
-      // Serialize tensor
-      framework::SerializeToStream(fout, tensor, dev_ctx);
+      // Serialize tensors one by one
+
+      // Check types to see if a fp16 transformation is required
+      auto in_dtype = framework::ToDataType(tensor.type());
+      auto out_dtype =
+          save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
+
+      if (in_dtype != out_dtype) {
+        auto in_kernel_type = framework::OpKernelType(in_dtype, place);
+        auto out_kernel_type = framework::OpKernelType(out_dtype, place);
+        framework::LoDTensor out;
+        // copy LoD info to the new tensor
+        out.set_lod(tensor.lod());
+        framework::TransDataType(in_kernel_type, out_kernel_type, tensor, &out);
+        framework::SerializeToStream(fout, out, dev_ctx);
+      } else {
+        framework::SerializeToStream(fout, tensor, dev_ctx);
+      }
     }
     fout.close();
   }
@@ -125,6 +143,12 @@ to a file on disk.
                   "(boolean, default true)"
                   "Overwrite the output file if it exists.")
         .SetDefault(true);
+    AddAttr<bool>("save_as_fp16",
+                  "(boolean, default false)"
+                  "If true, the tensor will be converted to float16 data "
+                  "type and then saved. Otherwise, the tensor will be "
+                  "directly saved without data type conversion.")
+        .SetDefault(false);
     AddAttr<std::string>(
         "file_path",
         "(string)"
diff --git a/paddle/fluid/operators/save_load_combine_op_test.cc b/paddle/fluid/operators/save_load_combine_op_test.cc
@@ -17,15 +17,17 @@ limitations under the License. */
 #include <vector>
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/float16.h"
 
 USE_NO_KERNEL_OP(save_combine);
 USE_NO_KERNEL_OP(load_combine);
 
-int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
-                            std::string var_name,
-                            const paddle::platform::CPUPlace& place,
-                            paddle::framework::Scope* scope,
-                            paddle::framework::LoD* expect_lod) {
+template <typename T, typename U>
+T* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
+                          std::string var_name,
+                          const paddle::platform::CPUPlace& place,
+                          paddle::framework::Scope* scope,
+                          paddle::framework::LoD* expect_lod) {
   auto var = scope->Var(var_name);
   auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
   tensor->Resize({x, y});
@@ -34,9 +36,10 @@ int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
     (*expect_lod)[0].push_back(lod_info[i]);
   }
   tensor->set_lod(*expect_lod);
-  int* expect = tensor->mutable_data<int>(place);
+  T* expect = tensor->mutable_data<T>(place);
   for (int64_t i = 0; i < tensor->numel(); ++i) {
-    expect[i] = static_cast<int>(i);
+    expect[i] = static_cast<T>(
+        static_cast<U>(i));  // For FP16, we intend to do float(float16(i))
   }
   return expect;
 }
@@ -48,18 +51,20 @@ paddle::framework::LoDTensor* GeneratePlaceholderBeforeLoad(
   return target;
 }
 
-int* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
-                                 const paddle::framework::Scope& scope,
-                                 paddle::framework::LoD* actual_lod) {
-  int* actual = target->data<int>();
+template <typename T>
+T* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
+                               const paddle::framework::Scope& scope,
+                               paddle::framework::LoD* actual_lod) {
+  T* actual = target->data<T>();
   *actual_lod = target->lod();
   return actual;
 }
 
-void CheckValues(int* expect, int* actual, paddle::framework::LoD expect_lod,
-                 paddle::framework::LoD actual_lod, const int& numel) {
-  for (int64_t i = 0; i < numel; ++i) {
-    EXPECT_EQ(expect[i], actual[i]);
+template <typename T, typename U>
+void CheckValues(T* expect, U* actual, const paddle::framework::LoD& expect_lod,
+                 const paddle::framework::LoD& actual_lod, const int& numel) {
+  for (int i = 0; i < numel; ++i) {
+    EXPECT_EQ(expect[i], static_cast<T>(actual[i]));
   }
   EXPECT_EQ(expect_lod.size(), actual_lod.size());
   for (size_t i = 0; i < expect_lod.size(); ++i) {
@@ -78,26 +83,26 @@ TEST(SaveLoadCombineOp, CPU) {
   std::vector<int> lod1 = {0, 1, 2, 3, 10};
   int numel1 = 100;
   paddle::framework::LoD expect_lod1;
-  int* expect1 = CreateForSaveCombineOp(10, 10, lod1, "test_var1", place,
-                                        &scope, &expect_lod1);
+  int* expect1 = CreateForSaveCombineOp<int, int>(10, 10, lod1, "test_var1",
+                                                  place, &scope, &expect_lod1);
 
   std::vector<int> lod2 = {0, 2, 5, 10};
   int numel2 = 200;
   paddle::framework::LoD expect_lod2;
-  int* expect2 = CreateForSaveCombineOp(10, 20, lod2, "test_var2", place,
-                                        &scope, &expect_lod2);
+  int* expect2 = CreateForSaveCombineOp<int, int>(10, 20, lod2, "test_var2",
+                                                  place, &scope, &expect_lod2);
 
   std::vector<int> lod3 = {0, 2, 3, 20};
   int numel3 = 4000;
   paddle::framework::LoD expect_lod3;
-  int* expect3 = CreateForSaveCombineOp(20, 200, lod3, "test_var3", place,
-                                        &scope, &expect_lod3);
+  int* expect3 = CreateForSaveCombineOp<int, int>(20, 200, lod3, "test_var3",
+                                                  place, &scope, &expect_lod3);
 
   std::vector<int> lod4 = {0, 1, 20};
   int numel4 = 1000;
   paddle::framework::LoD expect_lod4;
-  int* expect4 = CreateForSaveCombineOp(20, 50, lod4, "test_var4", place,
-                                        &scope, &expect_lod4);
+  int* expect4 = CreateForSaveCombineOp<int, int>(20, 50, lod4, "test_var4",
+                                                  place, &scope, &expect_lod4);
 
   // Set attributes
   std::string filename = "check_tensor.ls";
@@ -123,15 +128,92 @@ TEST(SaveLoadCombineOp, CPU) {
   load_combine_op->Run(scope, place);
 
   paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
-  int* actual1 = GetValuesAfterLoadCombineOp(target1, scope, &actual_lod1);
-  int* actual2 = GetValuesAfterLoadCombineOp(target2, scope, &actual_lod2);
-  int* actual3 = GetValuesAfterLoadCombineOp(target3, scope, &actual_lod3);
-  int* actual4 = GetValuesAfterLoadCombineOp(target4, scope, &actual_lod4);
-
-  CheckValues(expect1, actual1, expect_lod1, actual_lod1, numel1);
-  CheckValues(expect2, actual2, expect_lod2, actual_lod2, numel2);
-  CheckValues(expect3, actual3, expect_lod3, actual_lod3, numel3);
-  CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4);
+  int* actual1 = GetValuesAfterLoadCombineOp<int>(target1, scope, &actual_lod1);
+  int* actual2 = GetValuesAfterLoadCombineOp<int>(target2, scope, &actual_lod2);
+  int* actual3 = GetValuesAfterLoadCombineOp<int>(target3, scope, &actual_lod3);
+  int* actual4 = GetValuesAfterLoadCombineOp<int>(target4, scope, &actual_lod4);
+
+  CheckValues<int, int>(expect1, actual1, expect_lod1, actual_lod1, numel1);
+  CheckValues<int, int>(expect2, actual2, expect_lod2, actual_lod2, numel2);
+  CheckValues<int, int>(expect3, actual3, expect_lod3, actual_lod3, numel3);
+  CheckValues<int, int>(expect4, actual4, expect_lod4, actual_lod4, numel4);
+}
+
+// FP16 version of SaveLoadCombineOp Test
+TEST(SaveLoadCombineFP16Op, CPU) {
+  paddle::framework::Scope scope;
+  paddle::platform::CPUPlace place;
+
+  std::vector<int> lod1 = {0, 1, 2, 3, 10};
+  int numel1 = 100;
+  paddle::framework::LoD expect_lod1;
+  float* expect1 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      10, 10, lod1, "test_var1", place, &scope, &expect_lod1);
+
+  std::vector<int> lod2 = {0, 2, 5, 10};
+  int numel2 = 200;
+  paddle::framework::LoD expect_lod2;
+  float* expect2 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      10, 20, lod2, "test_var2", place, &scope, &expect_lod2);
+
+  std::vector<int> lod3 = {0, 20};
+  int numel3 = 4000;
+  paddle::framework::LoD expect_lod3;
+  float* expect3 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      20, 200, lod3, "test_var3", place, &scope, &expect_lod3);
+
+  std::vector<int> lod4 = {0, 1, 20};
+  int numel4 = 1000;
+  paddle::framework::LoD expect_lod4;
+  float* expect4 = CreateForSaveCombineOp<float, paddle::platform::float16>(
+      20, 50, lod4, "test_var4", place, &scope, &expect_lod4);
+
+  // Set attributes
+  std::string filename = "check_tensor_fp16.ls";
+  paddle::framework::AttributeMap attrs;
+  attrs.insert({"file_path", std::string(filename)});
+  attrs.insert({"save_as_fp16", true});
+
+  // Run the save_combine_op
+  auto save_combine_op = paddle::framework::OpRegistry::CreateOp(
+      "save_combine",
+      {{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs);
+  save_combine_op->Run(scope, place);
+
+  // Set up output vars
+  auto target1 = GeneratePlaceholderBeforeLoad("out_var1", &scope);
+  auto target2 = GeneratePlaceholderBeforeLoad("out_var2", &scope);
+  auto target3 = GeneratePlaceholderBeforeLoad("out_var3", &scope);
+  auto target4 = GeneratePlaceholderBeforeLoad("out_var4", &scope);
+
+  // Run the load_combine_op
+  auto load_combine_op = paddle::framework::OpRegistry::CreateOp(
+      "load_combine", {},
+      {{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs);
+  load_combine_op->Run(scope, place);
+
+  paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
+  paddle::platform::float16* actual1 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target1, scope,
+                                                             &actual_lod1);
+  paddle::platform::float16* actual2 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target2, scope,
+                                                             &actual_lod2);
+  paddle::platform::float16* actual3 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target3, scope,
+                                                             &actual_lod3);
+  paddle::platform::float16* actual4 =
+      GetValuesAfterLoadCombineOp<paddle::platform::float16>(target4, scope,
+                                                             &actual_lod4);
+
+  CheckValues<float, paddle::platform::float16>(expect1, actual1, expect_lod1,
+                                                actual_lod1, numel1);
+  CheckValues<float, paddle::platform::float16>(expect2, actual2, expect_lod2,
+                                                actual_lod2, numel2);
+  CheckValues<float, paddle::platform::float16>(expect3, actual3, expect_lod3,
+                                                actual_lod3, numel3);
+  CheckValues<float, paddle::platform::float16>(expect4, actual4, expect_lod4,
+                                                actual_lod4, numel4);
 }
 
 // Test with original SaveLoadTest
@@ -141,7 +223,7 @@ TEST(SaveLoadTestWithCombineOp, CPU) {
 
   auto var = scope.Var("test_var");
   auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
-  tensor->Resize({3, 10});
+  tensor->Resize({3, 4000});
   paddle::framework::LoD expect_lod;
   expect_lod.resize(1);
   expect_lod[0].push_back(0);