Skip to content

Commit 6af0593

Browse files
authored
Add FP16 option to load_combine op (#10601)
1 parent 5f6fd26 commit 6af0593

File tree

2 files changed

+113
-13
lines changed

2 files changed

+113
-13
lines changed

paddle/fluid/operators/load_combine_op.cc

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414
#include <fstream>
15-
15+
#include "paddle/fluid/framework/data_type_transform.h"
1616
#include "paddle/fluid/framework/op_registry.h"
1717
#include "paddle/fluid/platform/device_context.h"
1818

@@ -31,6 +31,7 @@ class LoadCombineOp : public framework::OperatorBase {
3131
void RunImpl(const framework::Scope &scope,
3232
const platform::Place &place) const override {
3333
auto filename = Attr<std::string>("file_path");
34+
auto load_as_fp16 = Attr<bool>("load_as_fp16");
3435

3536
std::ifstream fin(filename);
3637
PADDLE_ENFORCE(static_cast<bool>(fin),
@@ -59,17 +60,25 @@ class LoadCombineOp : public framework::OperatorBase {
5960
// Get data from fin to tensor
6061
DeserializeFromStream(fin, tensor, dev_ctx);
6162

62-
if (platform::is_gpu_place(place)) {
63-
// copy CPU to GPU
64-
framework::LoDTensor cpu_tensor;
65-
cpu_tensor.ShareDataWith(*tensor);
66-
cpu_tensor.set_lod(tensor->lod());
67-
68-
// reset tensor
63+
auto in_dtype = framework::ToDataType(tensor->type());
64+
auto out_dtype =
65+
load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
66+
67+
if (in_dtype != out_dtype) {
68+
// convert to float16 tensor
69+
auto in_kernel_type = framework::OpKernelType(in_dtype, place);
70+
auto out_kernel_type = framework::OpKernelType(out_dtype, place);
71+
framework::LoDTensor fp16_tensor;
72+
// copy LoD info to the new tensor
73+
fp16_tensor.set_lod(tensor->lod());
74+
framework::TransDataType(in_kernel_type, out_kernel_type, *tensor,
75+
&fp16_tensor);
76+
77+
// reset output tensor
6978
out_var->Clear();
7079
tensor = out_var->GetMutable<framework::LoDTensor>();
71-
tensor->set_lod(cpu_tensor.lod());
72-
TensorCopy(cpu_tensor, place, dev_ctx, tensor);
80+
tensor->set_lod(fp16_tensor.lod());
81+
tensor->ShareDataWith(fp16_tensor);
7382
}
7483
}
7584
}
@@ -82,6 +91,13 @@ class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker {
8291
"Out",
8392
"(vector) The output LoDTensors that will be read from the input file.")
8493
.AsDuplicable();
94+
AddAttr<bool>(
95+
"load_as_fp16",
96+
"(boolean, default false)"
97+
"If true, the tensor will be first loaded and then "
98+
"converted to float16 data type. Otherwise, the tensor will be "
99+
"directly loaded without data type conversion.")
100+
.SetDefault(false);
85101
AddAttr<std::string>("file_path",
86102
"(string) "
87103
"LoDTensors will be loaded from \"file_path\".")

paddle/fluid/operators/save_load_combine_op_test.cc

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,9 @@ TEST(SaveLoadCombineOp, CPU) {
139139
CheckValues<int, int>(expect4, actual4, expect_lod4, actual_lod4, numel4);
140140
}
141141

142-
// FP16 version of SaveLoadCombineOp Test
143-
TEST(SaveLoadCombineFP16Op, CPU) {
142+
// FP16 version of SaveLoadCombineOp Test, only altering the saving aspect
143+
// to save as FP16.
144+
TEST(SaveCombineFP16Op, CPU) {
144145
paddle::framework::Scope scope;
145146
paddle::platform::CPUPlace place;
146147

@@ -169,7 +170,7 @@ TEST(SaveLoadCombineFP16Op, CPU) {
169170
20, 50, lod4, "test_var4", place, &scope, &expect_lod4);
170171

171172
// Set attributes
172-
std::string filename = "check_tensor_fp16.ls";
173+
std::string filename = "check_tensor_fp16_save.ls";
173174
paddle::framework::AttributeMap attrs;
174175
attrs.insert({"file_path", std::string(filename)});
175176
attrs.insert({"save_as_fp16", true});
@@ -216,6 +217,89 @@ TEST(SaveLoadCombineFP16Op, CPU) {
216217
actual_lod4, numel4);
217218
}
218219

220+
// FP16 version of SaveLoadCombineOp Test, only altering the loading aspect
221+
// to load tensors with FP16 precision.
222+
TEST(LoadCombineFP16Op, CPU) {
223+
paddle::framework::Scope scope;
224+
paddle::platform::CPUPlace place;
225+
226+
std::vector<int> lod1 = {0, 1, 2, 3, 10};
227+
int numel1 = 100;
228+
paddle::framework::LoD expect_lod1;
229+
float* expect1 = CreateForSaveCombineOp<float, paddle::platform::float16>(
230+
10, 10, lod1, "test_var1", place, &scope, &expect_lod1);
231+
232+
std::vector<int> lod2 = {0, 2, 5, 10};
233+
int numel2 = 200;
234+
paddle::framework::LoD expect_lod2;
235+
float* expect2 = CreateForSaveCombineOp<float, paddle::platform::float16>(
236+
10, 20, lod2, "test_var2", place, &scope, &expect_lod2);
237+
238+
std::vector<int> lod3 = {0, 20};
239+
int numel3 = 4000;
240+
paddle::framework::LoD expect_lod3;
241+
float* expect3 = CreateForSaveCombineOp<float, paddle::platform::float16>(
242+
20, 200, lod3, "test_var3", place, &scope, &expect_lod3);
243+
244+
std::vector<int> lod4 = {0, 1, 20};
245+
int numel4 = 1000;
246+
paddle::framework::LoD expect_lod4;
247+
float* expect4 = CreateForSaveCombineOp<float, paddle::platform::float16>(
248+
20, 50, lod4, "test_var4", place, &scope, &expect_lod4);
249+
250+
// Set attributes
251+
std::string filename = "check_tensor_fp16_load.ls";
252+
paddle::framework::AttributeMap attrs;
253+
attrs.insert({"file_path", std::string(filename)});
254+
255+
// Run the save_combine_op
256+
auto save_combine_op = paddle::framework::OpRegistry::CreateOp(
257+
"save_combine",
258+
{{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs);
259+
save_combine_op->Run(scope, place);
260+
261+
// Set up output vars
262+
auto load_var1 = scope.Var("out_var1");
263+
auto load_var2 = scope.Var("out_var2");
264+
auto load_var3 = scope.Var("out_var3");
265+
auto load_var4 = scope.Var("out_var4");
266+
267+
attrs.insert({"load_as_fp16", true});
268+
// Run the load_combine_op
269+
auto load_combine_op = paddle::framework::OpRegistry::CreateOp(
270+
"load_combine", {},
271+
{{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs);
272+
load_combine_op->Run(scope, place);
273+
274+
auto* target1 = load_var1->GetMutable<paddle::framework::LoDTensor>();
275+
auto* target2 = load_var2->GetMutable<paddle::framework::LoDTensor>();
276+
auto* target3 = load_var3->GetMutable<paddle::framework::LoDTensor>();
277+
auto* target4 = load_var4->GetMutable<paddle::framework::LoDTensor>();
278+
279+
paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
280+
paddle::platform::float16* actual1 =
281+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target1, scope,
282+
&actual_lod1);
283+
paddle::platform::float16* actual2 =
284+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target2, scope,
285+
&actual_lod2);
286+
paddle::platform::float16* actual3 =
287+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target3, scope,
288+
&actual_lod3);
289+
paddle::platform::float16* actual4 =
290+
GetValuesAfterLoadCombineOp<paddle::platform::float16>(target4, scope,
291+
&actual_lod4);
292+
293+
CheckValues<float, paddle::platform::float16>(expect1, actual1, expect_lod1,
294+
actual_lod1, numel1);
295+
CheckValues<float, paddle::platform::float16>(expect2, actual2, expect_lod2,
296+
actual_lod2, numel2);
297+
CheckValues<float, paddle::platform::float16>(expect3, actual3, expect_lod3,
298+
actual_lod3, numel3);
299+
CheckValues<float, paddle::platform::float16>(expect4, actual4, expect_lod4,
300+
actual_lod4, numel4);
301+
}
302+
219303
// Test with original SaveLoadTest
220304
TEST(SaveLoadTestWithCombineOp, CPU) {
221305
paddle::framework::Scope scope;

0 commit comments

Comments
 (0)