Skip to content

Commit 376e7f8

Browse files
JacobSzwejbkafacebook-github-bot
authored andcommitted
add cpp serializer for flat tensor (ptd)
Summary: Leverage the flatbuffer builder apis to generate a .ptd on device. This will be used by training for checkpointing. No other use cases really exist for generating a .ptd on device right now, so I didnt worry about making this more easily extensible by coming up with a cpp equivalent of a cord or by trying to think ahead of how this might integrate with delegates. Later if we add support for delegates owning the weights under ET training we can revisit this. Differential Revision: D67992901
1 parent 6fe6870 commit 376e7f8

File tree

6 files changed

+341
-0
lines changed

6 files changed

+341
-0
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/extension/flat_tensor/serialize/serialize.h>
10+
#include <executorch/extension/flat_tensor/serialize/schema_generated.h>
11+
#include <executorch/extension/flat_tensor/serialize/scalar_type_generated.h>
12+
13+
#include <string>
14+
#include <fstream>
15+
16+
namespace executorch {
17+
namespace extension {
18+
namespace flat_tensor {
19+
20+
namespace {
21+
size_t padding_required(size_t offset, size_t alignment) {
22+
// Returns the padding required to align `offset` to `alignment`.
23+
size_t remainder = offset % alignment;
24+
if (remainder != 0) {
25+
return alignment - remainder;
26+
}
27+
return 0;
28+
}
29+
30+
size_t aligned_size(size_t input_size, size_t alignment) {
31+
// Returns input_size padded up to the next whole multiple of alignment.
32+
return input_size + padding_required(input_size, alignment);
33+
}
34+
35+
void serialize_nulls(std::ostream& out, size_t num_bytes) {
36+
for (size_t i = 0; i < num_bytes; i++) {
37+
out.write("\0", 1);
38+
}
39+
}
40+
}
41+
42+
ET_EXPERIMENTAL runtime::Error save_ptd(
43+
const std::string& path,
44+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
45+
const size_t tensor_alignment
46+
) {
47+
// Assert the system is little endian
48+
int n = 1;
49+
if(*(char *)&n != 1) {
50+
return runtime::Error::NotSupported;
51+
}
52+
// Create File
53+
std::ofstream file;
54+
file.open(path);
55+
runtime::Error e = save_ptd(file, tensor_map, tensor_alignment);
56+
file.close();
57+
return e;
58+
}
59+
60+
ET_EXPERIMENTAL runtime::Error save_ptd(
61+
std::ostream& out,
62+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
63+
const size_t tensor_alignment
64+
) {
65+
// Assert the system is little endian
66+
int n = 1;
67+
if(*(char *)&n != 1) {
68+
return runtime::Error::NotSupported;
69+
}
70+
// Create flatbuffer
71+
flatbuffers::FlatBufferBuilder builder;
72+
73+
std::vector<flatbuffers::Offset<::flat_tensor::TensorMetadata>> tensors;
74+
std::vector<flatbuffers::Offset<::flat_tensor::DataSegment>> buffers;
75+
76+
// Serialize the tensors.
77+
size_t total_segment_size = 0;
78+
for (const auto& [name, tensor] : tensor_map) {
79+
auto name_offset = builder.CreateString(name);
80+
// Serialize the tensor metadata.
81+
auto tensor_metadata = ::flat_tensor::CreateTensorMetadata(
82+
builder,
83+
name_offset,
84+
static_cast<executorch_flatbuffer::ScalarType>(tensor.scalar_type()),
85+
builder.CreateVector(tensor.sizes().data(), tensor.sizes().size()),
86+
builder.CreateVector(tensor.dim_order().data(), tensor.dim_order().size()),
87+
0, // segment index
88+
total_segment_size);
89+
tensors.push_back(tensor_metadata); // Precalculate the size of the data blob
90+
total_segment_size += aligned_size(tensor.nbytes(), tensor_alignment);
91+
}
92+
// Only have one segment
93+
buffers.push_back(::flat_tensor::CreateDataSegment(builder, 0, total_segment_size));
94+
95+
auto flat_tensor = CreateFlatTensor(builder, internal::VERSION, tensor_alignment, builder.CreateVector(tensors), builder.CreateVector(buffers));
96+
builder.Finish(flat_tensor); // Our flatbuffer is created now.
97+
98+
99+
// Calculate flatbuffer padding.
100+
auto padded_flatbufer_size = aligned_size(builder.GetSize(), tensor_alignment);
101+
auto padded_header_size = aligned_size(internal::HEADER_EXPECTED_LENGTH, tensor_alignment);
102+
103+
// Serialize header
104+
out.write(internal::HEADER_MAGIC, sizeof(internal::HEADER_MAGIC));
105+
out.write(reinterpret_cast<const char *>(&internal::HEADER_EXPECTED_LENGTH), sizeof(internal::HEADER_EXPECTED_LENGTH));
106+
107+
internal::FlatTensorHeader header = {
108+
padded_header_size, // Offset to flatbuffer
109+
builder.GetSize(), // flatbuffer size
110+
padded_header_size + padded_flatbufer_size, // offset to segments
111+
total_segment_size // segment data size
112+
};
113+
114+
out.write(reinterpret_cast<const char *>(&header.flatbuffer_offset), sizeof(header.flatbuffer_offset));
115+
out.write(reinterpret_cast<const char *>(&header.flatbuffer_size), sizeof(header.flatbuffer_size));
116+
out.write(reinterpret_cast<const char *>(&header.segment_base_offset), sizeof(header.segment_base_offset));
117+
out.write(reinterpret_cast<const char *>(&header.segment_data_size), sizeof(header.segment_data_size));
118+
119+
// serialize header padding
120+
serialize_nulls(out, padding_required(internal::HEADER_EXPECTED_LENGTH, tensor_alignment));
121+
122+
// serialize flatbuffer
123+
out.write(reinterpret_cast<const char *>(builder.GetBufferPointer()), builder.GetSize());
124+
125+
// serialize flatbuffer padding
126+
serialize_nulls(out, padding_required(builder.GetSize(), tensor_alignment));
127+
128+
// serialize segment: buffers + tensor padding
129+
for (const auto& [name, tensor] : tensor_map) {
130+
out.write(reinterpret_cast<const char *>(tensor.data_ptr()), tensor.nbytes());
131+
serialize_nulls(out, padding_required(tensor.nbytes(), tensor_alignment));
132+
}
133+
return runtime::Error::Ok;
134+
}
135+
136+
} // namespace flat_tensor
137+
} // namespace extension
138+
} // namespace executorch
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
12+
13+
#include <string>
14+
#include <map>
15+
16+
namespace executorch {
17+
namespace extension {
18+
namespace flat_tensor {
19+
namespace internal {
20+
// cpp mirror of the python definition in serialize.py
21+
22+
// Class constants.
23+
const uint32_t VERSION = 0;
24+
// The magic bytes that should be at the beginning of the header.
25+
const char HEADER_MAGIC[4] = {'F', 'H', '0', '1'};
26+
const uint32_t HEADER_EXPECTED_LENGTH = (
27+
// Header magic
28+
4
29+
// Header length
30+
+ 4
31+
// Flatbuffer offset
32+
+ 8
33+
// Flatbuffer data size
34+
+ 8
35+
// Segment base offset
36+
+ 8
37+
// Data size
38+
+ 8
39+
);
40+
struct FlatTensorHeader {
41+
// Offset to the start of the flatbuffer data, in bytes.
42+
uint64_t flatbuffer_offset;
43+
// The size of the serialized data in bytes.
44+
uint64_t flatbuffer_size;
45+
// Offset to the start of the first segment, or zero if there
46+
// are no segments.
47+
uint64_t segment_base_offset;
48+
// Size of all the segment data, in bytes.
49+
uint64_t segment_data_size;
50+
};
51+
} // namespace internal
52+
53+
runtime::Error save_ptd(
54+
const std::string& path,
55+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
56+
const size_t tensor_alignment
57+
);
58+
59+
runtime::Error save_ptd(
60+
std::ostream& out,
61+
const std::map<std::string, exec_aten::Tensor>& tensor_map,
62+
const size_t tensor_alignment
63+
);
64+
65+
} // namespace flat_tensor
66+
} // namespace extension
67+
} // namespace executorch

extension/flat_tensor/serialize/targets.bzl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,14 @@ def define_common_targets():
3434
},
3535
exported_external_deps = ["flatbuffers-api"],
3636
)
37+
38+
runtime.cxx_library(
39+
name = "serialize_cpp",
40+
srcs = ["serialize.cpp"],
41+
deps = [":generated_headers", "//executorch/runtime/core/exec_aten:lib"],
42+
exported_headers = ["serialize.h"],
43+
visibility = [
44+
"//executorch/...",
45+
],
46+
exported_external_deps = ["flatbuffers-api"],
47+
)

extension/flat_tensor/test/TARGETS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1+
# Any targets that should be shared between fbcode and xplat must be defined in
2+
# targets.bzl. This file can contain fbcode-only targets.
3+
14
load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
5+
load(":targets.bzl", "define_common_targets")
26

37
oncall("executorch")
48

9+
define_common_targets()
10+
511
python_unittest(
612
name = "serialize",
713
srcs = [
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
3+
def define_common_targets():
4+
"""Defines targets that should be shared between fbcode and xplat.
5+
6+
The directory containing this targets.bzl file should also contain both
7+
TARGETS and BUCK files that call this function.
8+
"""
9+
10+
runtime.cxx_test(
11+
name = "serialize_cpp_test",
12+
srcs = [
13+
"test_serialize.cpp",
14+
],
15+
deps = [
16+
"//executorch/extension/flat_tensor/serialize:serialize_cpp",
17+
"//executorch/extension/flat_tensor/serialize:generated_headers",
18+
"//executorch/extension/tensor:tensor",
19+
],
20+
)
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/extension/flat_tensor/serialize/serialize.h>
10+
#include <executorch/extension/flat_tensor/serialize/schema_generated.h>
11+
#include <executorch/extension/flat_tensor/serialize/scalar_type_generated.h>
12+
13+
#include <gtest/gtest.h>
14+
15+
#include <executorch/runtime/core/result.h>
16+
#include <executorch/runtime/platform/runtime.h>
17+
#include <executorch/extension/tensor/tensor_ptr.h>
18+
#include <map>
19+
20+
#include <sstream>
21+
22+
using namespace ::testing;
23+
using executorch::runtime::Error;
24+
using executorch::runtime::Result;
25+
26+
class FlatTensorSerializeTest : public ::testing::Test {
27+
protected:
28+
void SetUp() override {
29+
// Since these tests cause ET_LOG to be called, the PAL must be initialized
30+
// first.
31+
executorch::runtime::runtime_init();
32+
}
33+
};
34+
35+
TEST_F(FlatTensorSerializeTest, ValidFlatTensorSerialized) {
36+
std::map<std::string, exec_aten::Tensor> flat_tensor_map;
37+
38+
float linear_weight = 3.14f;
39+
auto weight = executorch::extension::make_tensor_ptr({1}, &linear_weight);
40+
41+
float linear_bias = 2.0f;
42+
auto bias = executorch::extension::make_tensor_ptr({1}, &linear_bias);
43+
44+
flat_tensor_map.insert({"linear.weight", *weight.get()});
45+
flat_tensor_map.insert({"linear.bias", *bias.get()});
46+
47+
std::ostringstream buf;
48+
auto err = executorch::extension::flat_tensor::save_ptd(buf, flat_tensor_map, 16);
49+
ASSERT_EQ(err, Error::Ok);
50+
auto x = buf.str();
51+
const char* byte_buffer = x.c_str();
52+
53+
54+
// Check Magic
55+
ASSERT_EQ(byte_buffer[0], 'F');
56+
ASSERT_EQ(byte_buffer[1], 'H');
57+
ASSERT_EQ(byte_buffer[2], '0');
58+
ASSERT_EQ(byte_buffer[3], '1');
59+
60+
// Check Header
61+
ASSERT_EQ(*(uint32_t*)(byte_buffer + 4), executorch::extension::flat_tensor::internal::HEADER_EXPECTED_LENGTH);
62+
ASSERT_EQ(*(uint64_t*)(byte_buffer + 8), 48); // Header is 40 bytes + 8 bytes of padding today, and then the flatbuffer starts.
63+
ASSERT_EQ(*(uint64_t*)(byte_buffer + 16), 224); // This is fragile, and depends on the schema, the builder, and the padding needed.
64+
const uint64_t segment_offset = 48 + 224; // Segment offset, depends on the padded header and flatbuffer sizes.
65+
ASSERT_EQ(*(uint64_t*)(byte_buffer + 24), segment_offset);
66+
ASSERT_EQ(*(uint64_t*)(byte_buffer + 32), 32); // Segment total size, 8 bytes of data (2 floats), 24 bytes of padding.
67+
68+
// Check Flatbuffer
69+
auto flat_tensor = ::flat_tensor::GetFlatTensor(byte_buffer + 48);
70+
71+
ASSERT_EQ(flat_tensor->version(), executorch::extension::flat_tensor::internal::VERSION);
72+
ASSERT_EQ(flat_tensor->tensor_alignment(), 16);
73+
ASSERT_EQ(flat_tensor->tensors()->size(), 2);
74+
ASSERT_EQ(flat_tensor->segments()->size(), 1);
75+
76+
auto tensor0 = flat_tensor->tensors()->Get(0);
77+
ASSERT_EQ(strcmp(tensor0->fully_qualified_name()->c_str(), "linear.bias"), 0);
78+
ASSERT_EQ(tensor0->scalar_type(), executorch_flatbuffer::ScalarType::FLOAT);
79+
ASSERT_EQ(tensor0->sizes()->size(), 1);
80+
ASSERT_EQ(tensor0->segment_index(), 0);
81+
ASSERT_EQ(tensor0->offset(), 0);
82+
83+
auto tensor1 = flat_tensor->tensors()->Get(1);
84+
ASSERT_EQ(strcmp(tensor1->fully_qualified_name()->c_str(), "linear.weight"), 0);
85+
ASSERT_EQ(tensor1->scalar_type(), executorch_flatbuffer::ScalarType::FLOAT);
86+
ASSERT_EQ(tensor1->sizes()->size(), 1);
87+
ASSERT_EQ(tensor1->segment_index(), 0);
88+
ASSERT_EQ(tensor1->offset(), 16);
89+
90+
// Test Segments
91+
auto segment = flat_tensor->segments()->Get(0);
92+
93+
ASSERT_EQ(segment->offset(), 0);
94+
ASSERT_EQ(segment->size(), 32);
95+
uint8_t* data = (uint8_t*)(byte_buffer + segment_offset);
96+
ASSERT_EQ(*(float*)(data + 0), linear_bias);
97+
ASSERT_EQ(*(float*)(data + 16), linear_weight);
98+
99+
}

0 commit comments

Comments
 (0)