Skip to content

Commit 283c4db

Browse files
authored
Add FP16 option in save_combine_op (#10471)
* Add FP16 option in save_combine_op * Fix casting issue
1 parent 28a6037 commit 283c4db

File tree

2 files changed

+141
-35
lines changed

2 files changed

+141
-35
lines changed

paddle/fluid/operators/save_combine_op.cc

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ limitations under the License. */
1818
#include <numeric>
1919
#include <sstream>
2020
#include "paddle/fluid/framework/data_type.h"
21+
#include "paddle/fluid/framework/data_type_transform.h"
2122
#include "paddle/fluid/framework/framework.pb.h"
2223
#include "paddle/fluid/framework/lod_tensor.h"
2324
#include "paddle/fluid/framework/op_registry.h"
@@ -69,6 +70,7 @@ class SaveCombineOp : public framework::OperatorBase {
6970
const platform::Place &place) const override {
7071
auto filename = Attr<std::string>("file_path");
7172
auto overwrite = Attr<bool>("overwrite");
73+
auto save_as_fp16 = Attr<bool>("save_as_fp16");
7274

7375
bool is_present = FileExists(filename);
7476
if (is_present && !overwrite) {
@@ -100,8 +102,24 @@ class SaveCombineOp : public framework::OperatorBase {
100102
inp_var_names[i]);
101103

102104
auto &tensor = var->Get<framework::LoDTensor>();
103-
// Serialize tensor
104-
framework::SerializeToStream(fout, tensor, dev_ctx);
105+
// Serialize tensors one by one
106+
107+
// Check types to see if a fp16 transformation is required
108+
auto in_dtype = framework::ToDataType(tensor.type());
109+
auto out_dtype =
110+
save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
111+
112+
if (in_dtype != out_dtype) {
113+
auto in_kernel_type = framework::OpKernelType(in_dtype, place);
114+
auto out_kernel_type = framework::OpKernelType(out_dtype, place);
115+
framework::LoDTensor out;
116+
// copy LoD info to the new tensor
117+
out.set_lod(tensor.lod());
118+
framework::TransDataType(in_kernel_type, out_kernel_type, tensor, &out);
119+
framework::SerializeToStream(fout, out, dev_ctx);
120+
} else {
121+
framework::SerializeToStream(fout, tensor, dev_ctx);
122+
}
105123
}
106124
fout.close();
107125
}
@@ -125,6 +143,12 @@ to a file on disk.
125143
"(boolean, default true)"
126144
"Overwrite the output file if it exists.")
127145
.SetDefault(true);
146+
AddAttr<bool>("save_as_fp16",
147+
"(boolean, default false)"
148+
"If true, the tensor will be converted to float16 data "
149+
"type and then saved. Otherwise, the tensor will be "
150+
"directly saved without data type conversion.")
151+
.SetDefault(false);
128152
AddAttr<std::string>(
129153
"file_path",
130154
"(string)"

paddle/fluid/operators/save_load_combine_op_test.cc

Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,17 @@ limitations under the License. */
1717
#include <vector>
1818
#include "gtest/gtest.h"
1919
#include "paddle/fluid/framework/op_registry.h"
20+
#include "paddle/fluid/platform/float16.h"
2021

2122
USE_NO_KERNEL_OP(save_combine);
2223
USE_NO_KERNEL_OP(load_combine);
2324

24-
int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
25-
std::string var_name,
26-
const paddle::platform::CPUPlace& place,
27-
paddle::framework::Scope* scope,
28-
paddle::framework::LoD* expect_lod) {
25+
template <typename T, typename U>
26+
T* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
27+
std::string var_name,
28+
const paddle::platform::CPUPlace& place,
29+
paddle::framework::Scope* scope,
30+
paddle::framework::LoD* expect_lod) {
2931
auto var = scope->Var(var_name);
3032
auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
3133
tensor->Resize({x, y});
@@ -34,9 +36,10 @@ int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
3436
(*expect_lod)[0].push_back(lod_info[i]);
3537
}
3638
tensor->set_lod(*expect_lod);
37-
int* expect = tensor->mutable_data<int>(place);
39+
T* expect = tensor->mutable_data<T>(place);
3840
for (int64_t i = 0; i < tensor->numel(); ++i) {
39-
expect[i] = static_cast<int>(i);
41+
expect[i] = static_cast<T>(
42+
static_cast<U>(i)); // For FP16, we intend to do float(float16(i))
4043
}
4144
return expect;
4245
}
@@ -48,18 +51,20 @@ paddle::framework::LoDTensor* GeneratePlaceholderBeforeLoad(
4851
return target;
4952
}
5053

51-
int* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
52-
const paddle::framework::Scope& scope,
53-
paddle::framework::LoD* actual_lod) {
54-
int* actual = target->data<int>();
54+
template <typename T>
55+
T* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
56+
const paddle::framework::Scope& scope,
57+
paddle::framework::LoD* actual_lod) {
58+
T* actual = target->data<T>();
5559
*actual_lod = target->lod();
5660
return actual;
5761
}
5862

59-
void CheckValues(int* expect, int* actual, paddle::framework::LoD expect_lod,
60-
paddle::framework::LoD actual_lod, const int& numel) {
61-
for (int64_t i = 0; i < numel; ++i) {
62-
EXPECT_EQ(expect[i], actual[i]);
63+
template <typename T, typename U>
64+
void CheckValues(T* expect, U* actual, const paddle::framework::LoD& expect_lod,
65+
const paddle::framework::LoD& actual_lod, const int& numel) {
66+
for (int i = 0; i < numel; ++i) {
67+
EXPECT_EQ(expect[i], static_cast<T>(actual[i]));
6368
}
6469
EXPECT_EQ(expect_lod.size(), actual_lod.size());
6570
for (size_t i = 0; i < expect_lod.size(); ++i) {
@@ -78,26 +83,26 @@ TEST(SaveLoadCombineOp, CPU) {
7883
std::vector<int> lod1 = {0, 1, 2, 3, 10};
7984
int numel1 = 100;
8085
paddle::framework::LoD expect_lod1;
81-
int* expect1 = CreateForSaveCombineOp(10, 10, lod1, "test_var1", place,
82-
&scope, &expect_lod1);
86+
int* expect1 = CreateForSaveCombineOp<int, int>(10, 10, lod1, "test_var1",
87+
place, &scope, &expect_lod1);
8388

8489
std::vector<int> lod2 = {0, 2, 5, 10};
8590
int numel2 = 200;
8691
paddle::framework::LoD expect_lod2;
87-
int* expect2 = CreateForSaveCombineOp(10, 20, lod2, "test_var2", place,
88-
&scope, &expect_lod2);
92+
int* expect2 = CreateForSaveCombineOp<int, int>(10, 20, lod2, "test_var2",
93+
place, &scope, &expect_lod2);
8994

9095
std::vector<int> lod3 = {0, 2, 3, 20};
9196
int numel3 = 4000;
9297
paddle::framework::LoD expect_lod3;
93-
int* expect3 = CreateForSaveCombineOp(20, 200, lod3, "test_var3", place,
94-
&scope, &expect_lod3);
98+
int* expect3 = CreateForSaveCombineOp<int, int>(20, 200, lod3, "test_var3",
99+
place, &scope, &expect_lod3);
95100

96101
std::vector<int> lod4 = {0, 1, 20};
97102
int numel4 = 1000;
98103
paddle::framework::LoD expect_lod4;
99-
int* expect4 = CreateForSaveCombineOp(20, 50, lod4, "test_var4", place,
100-
&scope, &expect_lod4);
104+
int* expect4 = CreateForSaveCombineOp<int, int>(20, 50, lod4, "test_var4",
105+
place, &scope, &expect_lod4);
101106

102107
// Set attributes
103108
std::string filename = "check_tensor.ls";
@@ -123,15 +128,92 @@ TEST(SaveLoadCombineOp, CPU) {
123128
load_combine_op->Run(scope, place);
124129

125130
paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
126-
int* actual1 = GetValuesAfterLoadCombineOp(target1, scope, &actual_lod1);
127-
int* actual2 = GetValuesAfterLoadCombineOp(target2, scope, &actual_lod2);
128-
int* actual3 = GetValuesAfterLoadCombineOp(target3, scope, &actual_lod3);
129-
int* actual4 = GetValuesAfterLoadCombineOp(target4, scope, &actual_lod4);
130-
131-
CheckValues(expect1, actual1, expect_lod1, actual_lod1, numel1);
132-
CheckValues(expect2, actual2, expect_lod2, actual_lod2, numel2);
133-
CheckValues(expect3, actual3, expect_lod3, actual_lod3, numel3);
134-
CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4);
131+
int* actual1 = GetValuesAfterLoadCombineOp<int>(target1, scope, &actual_lod1);
132+
int* actual2 = GetValuesAfterLoadCombineOp<int>(target2, scope, &actual_lod2);
133+
int* actual3 = GetValuesAfterLoadCombineOp<int>(target3, scope, &actual_lod3);
134+
int* actual4 = GetValuesAfterLoadCombineOp<int>(target4, scope, &actual_lod4);
135+
136+
CheckValues<int, int>(expect1, actual1, expect_lod1, actual_lod1, numel1);
137+
CheckValues<int, int>(expect2, actual2, expect_lod2, actual_lod2, numel2);
138+
CheckValues<int, int>(expect3, actual3, expect_lod3, actual_lod3, numel3);
139+
CheckValues<int, int>(expect4, actual4, expect_lod4, actual_lod4, numel4);
140+
}
141+
142+
// FP16 version of SaveLoadCombineOp Test
143+
TEST(SaveLoadCombineFP16Op, CPU) {
144+
paddle::framework::Scope scope;
145+
paddle::platform::CPUPlace place;
146+
147+
std::vector<int> lod1 = {0, 1, 2, 3, 10};
148+
int numel1 = 100;
149+
paddle::framework::LoD expect_lod1;
150+
float* expect1 = CreateForSaveCombineOp<float, paddle::platform::float16>(
151+
10, 10, lod1, "test_var1", place, &scope, &expect_lod1);
152+
153+
std::vector<int> lod2 = {0, 2, 5, 10};
154+
int numel2 = 200;
155+
paddle::framework::LoD expect_lod2;
156+
float* expect2 = CreateForSaveCombineOp<float, paddle::platform::float16>(
157+
10, 20, lod2, "test_var2", place, &scope, &expect_lod2);
158+
159+
std::vector<int> lod3 = {0, 20};
160+
int numel3 = 4000;
161+
paddle::framework::LoD expect_lod3;
162+
float* expect3 = CreateForSaveCombineOp<float, paddle::platform::float16>(
163+
20, 200, lod3, "test_var3", place, &scope, &expect_lod3);
164+
165+
std::vector<int> lod4 = {0, 1, 20};
166+
int numel4 = 1000;
167+
paddle::framework::LoD expect_lod4;
168+
float* expect4 = CreateForSaveCombineOp<float, paddle::platform::float16>(
169+
20, 50, lod4, "test_var4", place, &scope, &expect_lod4);
170+
171+
// Set attributes
172+
std::string filename = "check_tensor_fp16.ls";
173+
paddle::framework::AttributeMap attrs;
174+
attrs.insert({"file_path", std::string(filename)});
175+
attrs.insert({"save_as_fp16", true});
176+
177+
// Run the save_combine_op
178+
auto save_combine_op = paddle::framework::OpRegistry::CreateOp(
179+
"save_combine",
180+
{{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs);
181+
save_combine_op->Run(scope, place);
182+
183+
// Set up output vars
184+
auto target1 = GeneratePlaceholderBeforeLoad("out_var1", &scope);
185+
auto target2 = GeneratePlaceholderBeforeLoad("out_var2", &scope);
186+
auto target3 = GeneratePlaceholderBeforeLoad("out_var3", &scope);
187+
auto target4 = GeneratePlaceholderBeforeLoad("out_var4", &scope);
188+
189+
// Run the load_combine_op
190+
auto load_combine_op = paddle::framework::OpRegistry::CreateOp(
191+
"load_combine", {},
192+
{{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs);
193+
load_combine_op->Run(scope, place);
194+
195+
paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
196+
paddle::platform::float16* actual1 =
197+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target1, scope,
198+
&actual_lod1);
199+
paddle::platform::float16* actual2 =
200+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target2, scope,
201+
&actual_lod2);
202+
paddle::platform::float16* actual3 =
203+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target3, scope,
204+
&actual_lod3);
205+
paddle::platform::float16* actual4 =
206+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target4, scope,
207+
&actual_lod4);
208+
209+
CheckValues<float, paddle::platform::float16>(expect1, actual1, expect_lod1,
210+
actual_lod1, numel1);
211+
CheckValues<float, paddle::platform::float16>(expect2, actual2, expect_lod2,
212+
actual_lod2, numel2);
213+
CheckValues<float, paddle::platform::float16>(expect3, actual3, expect_lod3,
214+
actual_lod3, numel3);
215+
CheckValues<float, paddle::platform::float16>(expect4, actual4, expect_lod4,
216+
actual_lod4, numel4);
135217
}
136218

137219
// Test with original SaveLoadTest
@@ -141,7 +223,7 @@ TEST(SaveLoadTestWithCombineOp, CPU) {
141223

142224
auto var = scope.Var("test_var");
143225
auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
144-
tensor->Resize({3, 10});
226+
tensor->Resize({3, 4000});
145227
paddle::framework::LoD expect_lod;
146228
expect_lod.resize(1);
147229
expect_lod[0].push_back(0);

0 commit comments

Comments
 (0)