Skip to content

Commit fd69719

Browse files
JacobSzwejbkafacebook-github-bot
authored andcommitted
add cpp serializer for flat tensor (ptd) (#7841)
Summary: Leverage the flatbuffer builder apis to generate a .ptd on device. This will be used by training for checkpointing. No other use cases really exist for generating a .ptd on device right now, so I didnt worry about making this more easily extensible by coming up with a cpp equivalent of a cord or by trying to think ahead of how this might integrate with delegates. Later if we add support for delegates owning the weights under ET training we can revisit this. Differential Revision: D67992901
1 parent 85fbdc8 commit fd69719

File tree

6 files changed

+374
-0
lines changed

6 files changed

+374
-0
lines changed

extension/flat_tensor/serialize/flat_tensor_header.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ struct FlatTensorHeader {
2323
*/
2424
static constexpr size_t kNumHeadBytes = 64;
2525

26+
static constexpr uint32_t kVersion = 0;
27+
2628
/**
2729
* The magic bytes that identify the header. This should be in sync with
2830
* the magic in executorch/extension/flat_tensor/serialize/serialize.py
@@ -38,6 +40,21 @@ struct FlatTensorHeader {
3840
// @lint-ignore CLANGTIDY facebook-hte-CArray
3941
static constexpr char kMagic[kMagicSize] = {'F', 'H', '0', '1'};
4042

43+
/// The expected length of the header, in bytes.
44+
static constexpr uint32_t kHeaderExpectedLength =
45+
// Header magic
46+
4
47+
// Header length
48+
+ 4
49+
// Flatbuffer offset
50+
+ 8
51+
// Flatbuffer data size
52+
+ 8
53+
// Segment base offset
54+
+ 8
55+
// Data size
56+
+ 8;
57+
4158
/**
4259
* Look for and parse a FlatTensorHeader in the provided data.
4360
*
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/extension/flat_tensor/serialize/scalar_type_generated.h>
10+
#include <executorch/extension/flat_tensor/serialize/schema_generated.h>
11+
#include <executorch/extension/flat_tensor/serialize/serialize.h>
12+
#include <executorch/extension/flat_tensor/serialize/flat_tensor_header.h>
13+
14+
#include <fstream>
15+
#include <string>
16+
17+
namespace executorch {
18+
namespace extension {
19+
namespace flat_tensor {
20+
21+
namespace {
22+
size_t padding_required(size_t offset, size_t alignment) {
23+
// Returns the padding required to align `offset` to `alignment`.
24+
size_t remainder = offset % alignment;
25+
if (remainder != 0) {
26+
return alignment - remainder;
27+
}
28+
return 0;
29+
}
30+
31+
size_t aligned_size(size_t input_size, size_t alignment) {
32+
// Returns input_size padded up to the next whole multiple of alignment.
33+
return input_size + padding_required(input_size, alignment);
34+
}
35+
36+
void write_nulls(std::ostream& out, size_t num_bytes) {
37+
for (size_t i = 0; i < num_bytes; i++) {
38+
out.write("\0", 1);
39+
}
40+
}
41+
} // namespace
42+
43+
runtime::Error save_ptd(
44+
const std::string& path,
45+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
46+
const size_t tensor_alignment) {
47+
// Create File
48+
std::ofstream file;
49+
file.open(path);
50+
runtime::Error e = save_ptd(file, tensor_map, tensor_alignment);
51+
file.close();
52+
return e;
53+
}
54+
55+
runtime::Error save_ptd(
56+
std::ostream& out,
57+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
58+
const size_t tensor_alignment) {
59+
// Assert the system is little endian. Since we are sending the data over
60+
// the wire, we need to ensure that the data is always in the same format.
61+
// for now we only support little endian.
62+
int n = 1;
63+
if (*(char*)&n != 1) {
64+
ET_LOG(Error, "Cannot save_ptd on big endian system");
65+
return runtime::Error::NotSupported;
66+
}
67+
// Create flatbuffer
68+
flatbuffers::FlatBufferBuilder builder;
69+
70+
std::vector<flatbuffers::Offset<::flat_tensor::TensorMetadata>> tensors;
71+
std::vector<flatbuffers::Offset<::flat_tensor::DataSegment>> buffers;
72+
73+
// Write the tensors.
74+
size_t total_segment_size = 0;
75+
for (const auto& [name, tensor] : tensor_map) {
76+
auto name_offset = builder.CreateString(name);
77+
// Write the tensor metadata.
78+
auto tensor_metadata = ::flat_tensor::CreateTensorMetadata(
79+
builder,
80+
name_offset,
81+
static_cast<executorch_flatbuffer::ScalarType>(tensor.scalar_type()),
82+
builder.CreateVector(tensor.sizes().data(), tensor.sizes().size()),
83+
builder.CreateVector(
84+
tensor.dim_order().data(), tensor.dim_order().size()),
85+
0, // segment index
86+
total_segment_size);
87+
tensors.push_back(
88+
tensor_metadata); // Precalculate the size of the data blob
89+
total_segment_size += aligned_size(tensor.nbytes(), tensor_alignment);
90+
}
91+
// Only have one segment
92+
buffers.push_back(
93+
::flat_tensor::CreateDataSegment(builder, 0, total_segment_size));
94+
95+
auto flat_tensor = CreateFlatTensor(
96+
builder,
97+
FlatTensorHeader::kVersion,
98+
tensor_alignment,
99+
builder.CreateVector(tensors),
100+
builder.CreateVector(buffers));
101+
builder.Finish(flat_tensor); // Our flatbuffer is created now.
102+
103+
// Calculate flatbuffer padding.
104+
auto padded_flatbufer_size =
105+
aligned_size(builder.GetSize(), tensor_alignment);
106+
auto padded_header_size =
107+
aligned_size(FlatTensorHeader::kHeaderExpectedLength, tensor_alignment);
108+
109+
// Write header
110+
out.write(FlatTensorHeader::kMagic, sizeof(FlatTensorHeader::kMagic));
111+
out.write(
112+
reinterpret_cast<const char*>(&FlatTensorHeader::kHeaderExpectedLength),
113+
sizeof(FlatTensorHeader::kHeaderExpectedLength));
114+
115+
FlatTensorHeader header = {
116+
padded_header_size, // Offset to flatbuffer
117+
builder.GetSize(), // flatbuffer size
118+
padded_header_size + padded_flatbufer_size, // offset to segments
119+
total_segment_size // segment data size
120+
};
121+
122+
out.write(
123+
reinterpret_cast<const char*>(&header.flatbuffer_offset),
124+
sizeof(header.flatbuffer_offset));
125+
out.write(
126+
reinterpret_cast<const char*>(&header.flatbuffer_size),
127+
sizeof(header.flatbuffer_size));
128+
out.write(
129+
reinterpret_cast<const char*>(&header.segment_base_offset),
130+
sizeof(header.segment_base_offset));
131+
out.write(
132+
reinterpret_cast<const char*>(&header.segment_data_size),
133+
sizeof(header.segment_data_size));
134+
135+
// Write header padding
136+
write_nulls(
137+
out,
138+
padding_required(FlatTensorHeader::kHeaderExpectedLength, tensor_alignment));
139+
140+
// Write flatbuffer
141+
out.write(
142+
reinterpret_cast<const char*>(builder.GetBufferPointer()),
143+
builder.GetSize());
144+
145+
// Write flatbuffer padding
146+
write_nulls(out, padding_required(builder.GetSize(), tensor_alignment));
147+
148+
// Write segment: buffers + tensor padding
149+
for (const auto& [name, tensor] : tensor_map) {
150+
out.write(
151+
reinterpret_cast<const char*>(tensor.data_ptr()), tensor.nbytes());
152+
write_nulls(out, padding_required(tensor.nbytes(), tensor_alignment));
153+
}
154+
return runtime::Error::Ok;
155+
}
156+
157+
} // namespace flat_tensor
158+
} // namespace extension
159+
} // namespace executorch
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
12+
13+
#include <map>
14+
#include <string>
15+
16+
namespace executorch {
17+
namespace extension {
18+
namespace flat_tensor {
19+
20+
/**
21+
* Creates a .ptd from the given tensor map.
22+
*
23+
* @param path The file path to save the .ptd to.
24+
* @param tensor_map The map of tensor names to tensors to save.
25+
* @param tensor_alignment The bytes tensor data should be aligned to.
26+
* @return An error if the data could not be saved. Error::Ok for success.
27+
*/
28+
ET_EXPERIMENTAL runtime::Error save_ptd(
29+
const std::string& path,
30+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
31+
const size_t tensor_alignment);
32+
33+
/**
34+
* Creates a .ptd from the given tensor map.
35+
*
36+
* @param out The stream to write the .ptd data to.
37+
* @param tensor_map The map of tensor names to tensors to save.
38+
* @param tensor_alignment The bytes tensor data should be aligned to.
39+
* @return An error if the data could not be saved. Error::Ok for success.
40+
*/
41+
ET_EXPERIMENTAL runtime::Error save_ptd(
42+
std::ostream& out,
43+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
44+
const size_t tensor_alignment);
45+
46+
} // namespace flat_tensor
47+
} // namespace extension
48+
} // namespace executorch

extension/flat_tensor/serialize/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,18 @@ def define_common_targets():
4242
visibility = ["//executorch/..."],
4343
exported_deps = ["//executorch/runtime/core:core"],
4444
)
45+
46+
runtime.cxx_library(
47+
name = "serialize_cpp",
48+
srcs = ["serialize.cpp"],
49+
deps = [
50+
":flat_tensor_header",
51+
":generated_headers",
52+
"//executorch/runtime/core/exec_aten:lib",
53+
],
54+
exported_headers = ["serialize.h"],
55+
visibility = [
56+
"//executorch/...",
57+
],
58+
exported_external_deps = ["flatbuffers-api"],
59+
)

extension/flat_tensor/test/targets.bzl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,24 @@ def define_common_targets():
1616
"//executorch/extension/flat_tensor/serialize:flat_tensor_header",
1717
],
1818
)
19+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
20+
21+
def define_common_targets():
22+
"""Defines targets that should be shared between fbcode and xplat.
23+
24+
The directory containing this targets.bzl file should also contain both
25+
TARGETS and BUCK files that call this function.
26+
"""
27+
28+
runtime.cxx_test(
29+
name = "serialize_cpp_test",
30+
srcs = [
31+
"test_serialize.cpp",
32+
],
33+
deps = [
34+
"//executorch/extension/flat_tensor/serialize:serialize_cpp",
35+
"//executorch/extension/flat_tensor/serialize:generated_headers",
36+
"//executorch/extension/flat_tensor/serialize:flat_tensor_header",
37+
"//executorch/extension/tensor:tensor",
38+
],
39+
)
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/extension/flat_tensor/serialize/scalar_type_generated.h>
10+
#include <executorch/extension/flat_tensor/serialize/schema_generated.h>
11+
#include <executorch/extension/flat_tensor/serialize/serialize.h>
12+
#include <executorch/extension/flat_tensor/serialize/flat_tensor_header.h>
13+
14+
#include <gtest/gtest.h>
15+
16+
#include <executorch/extension/tensor/tensor_ptr.h>
17+
#include <executorch/runtime/core/result.h>
18+
#include <executorch/runtime/platform/runtime.h>
19+
#include <map>
20+
21+
#include <sstream>
22+
23+
using namespace ::testing;
24+
using executorch::runtime::Error;
25+
using executorch::runtime::Result;
26+
27+
class FlatTensorSerializeTest : public ::testing::Test {
28+
protected:
29+
void SetUp() override {
30+
// Since these tests cause ET_LOG to be called, the PAL must be initialized
31+
// first.
32+
executorch::runtime::runtime_init();
33+
}
34+
};
35+
36+
TEST_F(FlatTensorSerializeTest, ValidFlatTensorSerialized) {
37+
std::map<std::string, exec_aten::Tensor> flat_tensor_map;
38+
39+
float linear_weight = 3.14f;
40+
auto weight = executorch::extension::make_tensor_ptr({1}, &linear_weight);
41+
42+
float linear_bias = 2.0f;
43+
auto bias = executorch::extension::make_tensor_ptr({1}, &linear_bias);
44+
45+
flat_tensor_map.insert({"linear.weight", *weight.get()});
46+
flat_tensor_map.insert({"linear.bias", *bias.get()});
47+
48+
std::ostringstream buf;
49+
auto err =
50+
executorch::extension::flat_tensor::save_ptd(buf, flat_tensor_map, 16);
51+
ASSERT_EQ(err, Error::Ok);
52+
auto x = buf.str();
53+
const char* byte_buffer = x.c_str();
54+
55+
// Check Magic
56+
ASSERT_EQ(byte_buffer[0], 'F');
57+
ASSERT_EQ(byte_buffer[1], 'H');
58+
ASSERT_EQ(byte_buffer[2], '0');
59+
ASSERT_EQ(byte_buffer[3], '1');
60+
61+
// Check Header
62+
ASSERT_EQ(
63+
*(uint32_t*)(byte_buffer + 4),
64+
executorch::extension::FlatTensorHeader::kHeaderExpectedLength);
65+
ASSERT_EQ(
66+
*(uint64_t*)(byte_buffer + 8),
67+
48); // Header is 40 bytes + 8 bytes of padding today, and then the
68+
// flatbuffer starts.
69+
ASSERT_EQ(
70+
*(uint64_t*)(byte_buffer + 16),
71+
224); // This is fragile, and depends on the schema, the builder, and the
72+
// padding needed.
73+
const uint64_t segment_offset = 48 +
74+
224; // Segment offset, depends on the padded header and flatbuffer sizes.
75+
ASSERT_EQ(*(uint64_t*)(byte_buffer + 24), segment_offset);
76+
ASSERT_EQ(
77+
*(uint64_t*)(byte_buffer + 32),
78+
32); // Segment total size, 8 bytes of data (2 floats), 24 bytes of
79+
// padding.
80+
81+
// Check Flatbuffer
82+
auto flat_tensor = ::flat_tensor::GetFlatTensor(byte_buffer + 48);
83+
84+
ASSERT_EQ(
85+
flat_tensor->version(),
86+
executorch::extension::FlatTensorHeader::kVersion);
87+
ASSERT_EQ(flat_tensor->tensor_alignment(), 16);
88+
ASSERT_EQ(flat_tensor->tensors()->size(), 2);
89+
ASSERT_EQ(flat_tensor->segments()->size(), 1);
90+
91+
auto tensor0 = flat_tensor->tensors()->Get(0);
92+
ASSERT_EQ(strcmp(tensor0->fully_qualified_name()->c_str(), "linear.bias"), 0);
93+
ASSERT_EQ(tensor0->scalar_type(), executorch_flatbuffer::ScalarType::FLOAT);
94+
ASSERT_EQ(tensor0->sizes()->size(), 1);
95+
ASSERT_EQ(tensor0->segment_index(), 0);
96+
ASSERT_EQ(tensor0->offset(), 0);
97+
98+
auto tensor1 = flat_tensor->tensors()->Get(1);
99+
ASSERT_EQ(
100+
strcmp(tensor1->fully_qualified_name()->c_str(), "linear.weight"), 0);
101+
ASSERT_EQ(tensor1->scalar_type(), executorch_flatbuffer::ScalarType::FLOAT);
102+
ASSERT_EQ(tensor1->sizes()->size(), 1);
103+
ASSERT_EQ(tensor1->segment_index(), 0);
104+
ASSERT_EQ(tensor1->offset(), 16);
105+
106+
// Test Segments
107+
auto segment = flat_tensor->segments()->Get(0);
108+
109+
ASSERT_EQ(segment->offset(), 0);
110+
ASSERT_EQ(segment->size(), 32);
111+
uint8_t* data = (uint8_t*)(byte_buffer + segment_offset);
112+
ASSERT_EQ(*(float*)(data + 0), linear_bias);
113+
ASSERT_EQ(*(float*)(data + 16), linear_weight);
114+
}

0 commit comments

Comments
 (0)