add cpp serializer for flat tensor (ptd)

JacobSzwejbka · facebook-github-bot · commit 376e7f81295e · 2025-01-22T12:47:15.000-08:00
Summary:
Leverage the flatbuffer builder apis to generate a .ptd on device. This will be used by training for checkpointing.

No other use cases really exist for generating a .ptd on device right now, so I didnt worry about making this more easily extensible by coming up with a cpp equivalent of a cord or by trying to think ahead of how this might integrate with delegates.

Later if we add support for delegates owning the weights under ET training we can revisit this.

Differential Revision: D67992901
diff --git a/extension/flat_tensor/serialize/serialize.cpp b/extension/flat_tensor/serialize/serialize.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/extension/flat_tensor/serialize/serialize.h>
+#include <executorch/extension/flat_tensor/serialize/schema_generated.h>
+#include <executorch/extension/flat_tensor/serialize/scalar_type_generated.h>
+
+#include <string>
+#include <fstream>
+
+namespace executorch {
+namespace extension {
+namespace flat_tensor {
+
+namespace {
+  size_t padding_required(size_t offset, size_t alignment) {
+    // Returns the padding required to align `offset` to `alignment`.
+    size_t remainder = offset % alignment;
+    if (remainder != 0) {
+        return alignment - remainder;
+    }
+    return 0;
+  }
+
+  size_t aligned_size(size_t input_size, size_t alignment) {
+    // Returns input_size padded up to the next whole multiple of alignment.
+    return input_size + padding_required(input_size, alignment);
+  }
+
+  void serialize_nulls(std::ostream& out, size_t num_bytes) {
+    for (size_t i = 0; i < num_bytes; i++) {
+      out.write("\0", 1);
+    }
+  }
+}
+
+ET_EXPERIMENTAL runtime::Error save_ptd(
+  const std::string& path,
+  const std::map<std::string, exec_aten::Tensor>& tensor_map,
+  const size_t tensor_alignment
+) {
+  // Assert the system is little endian
+  int n = 1;
+  if(*(char *)&n != 1) {
+    return runtime::Error::NotSupported;
+  }
+  // Create File
+  std::ofstream file;
+  file.open(path);
+  runtime::Error e = save_ptd(file, tensor_map, tensor_alignment);
+  file.close();
+  return e;
+}
+
+ET_EXPERIMENTAL runtime::Error save_ptd(
+  std::ostream& out,
+  const std::map<std::string, exec_aten::Tensor>& tensor_map,
+  const size_t tensor_alignment
+) {
+  // Assert the system is little endian
+  int n = 1;
+  if(*(char *)&n != 1) {
+    return runtime::Error::NotSupported;
+  }
+  // Create flatbuffer
+  flatbuffers::FlatBufferBuilder builder;
+
+  std::vector<flatbuffers::Offset<::flat_tensor::TensorMetadata>> tensors;
+  std::vector<flatbuffers::Offset<::flat_tensor::DataSegment>> buffers;
+
+  // Serialize the tensors.
+  size_t total_segment_size = 0;
+  for (const auto& [name, tensor] : tensor_map) {
+    auto name_offset = builder.CreateString(name);
+    // Serialize the tensor metadata.
+    auto tensor_metadata = ::flat_tensor::CreateTensorMetadata(
+      builder,
+      name_offset,
+      static_cast<executorch_flatbuffer::ScalarType>(tensor.scalar_type()),
+      builder.CreateVector(tensor.sizes().data(), tensor.sizes().size()),
+      builder.CreateVector(tensor.dim_order().data(), tensor.dim_order().size()),
+      0, // segment index
+      total_segment_size);
+    tensors.push_back(tensor_metadata);      // Precalculate the size of the data blob
+    total_segment_size += aligned_size(tensor.nbytes(), tensor_alignment);
+  }
+  // Only have one segment
+  buffers.push_back(::flat_tensor::CreateDataSegment(builder, 0, total_segment_size));
+  
+  auto flat_tensor = CreateFlatTensor(builder, internal::VERSION, tensor_alignment, builder.CreateVector(tensors), builder.CreateVector(buffers));
+  builder.Finish(flat_tensor);  // Our flatbuffer is created now.
+
+
+  // Calculate flatbuffer padding.
+  auto padded_flatbufer_size = aligned_size(builder.GetSize(), tensor_alignment);
+  auto padded_header_size = aligned_size(internal::HEADER_EXPECTED_LENGTH, tensor_alignment);
+
+  // Serialize header
+  out.write(internal::HEADER_MAGIC, sizeof(internal::HEADER_MAGIC));
+  out.write(reinterpret_cast<const char *>(&internal::HEADER_EXPECTED_LENGTH), sizeof(internal::HEADER_EXPECTED_LENGTH));
+
+  internal::FlatTensorHeader header = {
+    padded_header_size, // Offset to flatbuffer
+    builder.GetSize(), // flatbuffer size
+    padded_header_size + padded_flatbufer_size, // offset to segments
+    total_segment_size // segment data size
+  };
+
+  out.write(reinterpret_cast<const char *>(&header.flatbuffer_offset), sizeof(header.flatbuffer_offset));
+  out.write(reinterpret_cast<const char *>(&header.flatbuffer_size), sizeof(header.flatbuffer_size));
+  out.write(reinterpret_cast<const char *>(&header.segment_base_offset), sizeof(header.segment_base_offset));
+  out.write(reinterpret_cast<const char *>(&header.segment_data_size), sizeof(header.segment_data_size));
+
+  // serialize header padding
+  serialize_nulls(out, padding_required(internal::HEADER_EXPECTED_LENGTH, tensor_alignment));
+
+  // serialize flatbuffer
+  out.write(reinterpret_cast<const char *>(builder.GetBufferPointer()), builder.GetSize());
+
+  // serialize flatbuffer padding
+  serialize_nulls(out, padding_required(builder.GetSize(), tensor_alignment));
+
+  // serialize segment: buffers + tensor padding
+  for (const auto& [name, tensor] : tensor_map) {
+    out.write(reinterpret_cast<const char *>(tensor.data_ptr()), tensor.nbytes());
+    serialize_nulls(out, padding_required(tensor.nbytes(), tensor_alignment));
+  }
+  return runtime::Error::Ok;
+}
+
+} // namespace flat_tensor
+} // namespace extension
+} // namespace executorch
diff --git a/extension/flat_tensor/serialize/serialize.h b/extension/flat_tensor/serialize/serialize.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+
+#include <string>
+#include <map>
+
+namespace executorch {
+namespace extension {
+namespace flat_tensor {
+namespace internal {
+  // cpp mirror of the python definition in serialize.py
+
+  // Class constants.
+  const uint32_t VERSION = 0;
+  // The magic bytes that should be at the beginning of the header.
+  const char HEADER_MAGIC[4] = {'F', 'H', '0', '1'};
+  const uint32_t HEADER_EXPECTED_LENGTH = (
+      // Header magic
+      4
+      // Header length
+      + 4
+      // Flatbuffer offset
+      + 8
+      // Flatbuffer data size
+      + 8
+      // Segment base offset
+      + 8
+      // Data size
+      + 8
+  );
+  struct FlatTensorHeader {
+    // Offset to the start of the flatbuffer data, in bytes.
+    uint64_t flatbuffer_offset;
+    // The size of the serialized data in bytes.
+    uint64_t flatbuffer_size;
+    // Offset to the start of the first segment, or zero if there
+    // are no segments.
+    uint64_t segment_base_offset;
+    // Size of all the segment data, in bytes.
+    uint64_t segment_data_size;
+  };
+} // namespace internal
+
+runtime::Error save_ptd(
+  const std::string& path,
+  const std::map<std::string, exec_aten::Tensor>& tensor_map,
+  const size_t tensor_alignment
+);
+
+runtime::Error save_ptd(
+  std::ostream& out,
+  const std::map<std::string, exec_aten::Tensor>& tensor_map,
+  const size_t tensor_alignment
+);
+
+} // namespace flat_tensor
+} // namespace extension
+} // namespace executorch
diff --git a/extension/flat_tensor/serialize/targets.bzl b/extension/flat_tensor/serialize/targets.bzl
@@ -34,3 +34,14 @@ def define_common_targets():
         },
         exported_external_deps = ["flatbuffers-api"],
     )
+
+    runtime.cxx_library(
+        name = "serialize_cpp",
+        srcs = ["serialize.cpp"],
+        deps = [":generated_headers", "//executorch/runtime/core/exec_aten:lib"],
+        exported_headers = ["serialize.h"],
+        visibility = [
+            "//executorch/...",
+        ],
+        exported_external_deps = ["flatbuffers-api"],
+    )
diff --git a/extension/flat_tensor/test/TARGETS b/extension/flat_tensor/test/TARGETS
@@ -1,7 +1,13 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
 load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load(":targets.bzl", "define_common_targets")
 
 oncall("executorch")
 
+define_common_targets()
+
 python_unittest(
     name = "serialize",
     srcs = [
diff --git a/extension/flat_tensor/test/targets.bzl b/extension/flat_tensor/test/targets.bzl
@@ -0,0 +1,20 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    """Defines targets that should be shared between fbcode and xplat.
+
+    The directory containing this targets.bzl file should also contain both
+    TARGETS and BUCK files that call this function.
+    """
+
+    runtime.cxx_test(
+        name = "serialize_cpp_test",
+        srcs = [
+            "test_serialize.cpp",
+        ],
+        deps = [
+            "//executorch/extension/flat_tensor/serialize:serialize_cpp",
+            "//executorch/extension/flat_tensor/serialize:generated_headers",
+            "//executorch/extension/tensor:tensor",
+        ],
+    )
diff --git a/extension/flat_tensor/test/test_serialize.cpp b/extension/flat_tensor/test/test_serialize.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/extension/flat_tensor/serialize/serialize.h>
+#include <executorch/extension/flat_tensor/serialize/schema_generated.h>
+#include <executorch/extension/flat_tensor/serialize/scalar_type_generated.h>
+
+#include <gtest/gtest.h>
+
+#include <executorch/runtime/core/result.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
+#include <map>
+
+#include <sstream>
+
+using namespace ::testing;
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+
+class FlatTensorSerializeTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Since these tests cause ET_LOG to be called, the PAL must be initialized
+    // first.
+    executorch::runtime::runtime_init();
+  }
+};
+
+TEST_F(FlatTensorSerializeTest, ValidFlatTensorSerialized) {
+  std::map<std::string, exec_aten::Tensor> flat_tensor_map;
+
+  float linear_weight = 3.14f;
+  auto weight = executorch::extension::make_tensor_ptr({1}, &linear_weight);
+
+  float linear_bias = 2.0f;
+  auto bias = executorch::extension::make_tensor_ptr({1}, &linear_bias);
+
+  flat_tensor_map.insert({"linear.weight", *weight.get()});
+  flat_tensor_map.insert({"linear.bias", *bias.get()});
+
+  std::ostringstream buf;
+  auto err = executorch::extension::flat_tensor::save_ptd(buf, flat_tensor_map, 16);
+  ASSERT_EQ(err, Error::Ok);
+  auto x = buf.str();
+  const char* byte_buffer = x.c_str();
+
+
+  // Check Magic
+  ASSERT_EQ(byte_buffer[0], 'F');
+  ASSERT_EQ(byte_buffer[1], 'H');
+  ASSERT_EQ(byte_buffer[2], '0');
+  ASSERT_EQ(byte_buffer[3], '1');
+
+  // Check Header
+  ASSERT_EQ(*(uint32_t*)(byte_buffer + 4), executorch::extension::flat_tensor::internal::HEADER_EXPECTED_LENGTH);
+  ASSERT_EQ(*(uint64_t*)(byte_buffer + 8), 48); // Header is 40 bytes + 8 bytes of padding today, and then the flatbuffer starts.
+  ASSERT_EQ(*(uint64_t*)(byte_buffer + 16), 224); // This is fragile, and depends on the schema, the builder, and the padding needed.
+  const uint64_t segment_offset = 48 + 224; // Segment offset, depends on the padded header and flatbuffer sizes.
+  ASSERT_EQ(*(uint64_t*)(byte_buffer + 24), segment_offset); 
+  ASSERT_EQ(*(uint64_t*)(byte_buffer + 32), 32); // Segment total size, 8 bytes of data (2 floats), 24 bytes of padding.
+
+  // Check Flatbuffer
+  auto flat_tensor = ::flat_tensor::GetFlatTensor(byte_buffer + 48);
+
+  ASSERT_EQ(flat_tensor->version(), executorch::extension::flat_tensor::internal::VERSION);
+  ASSERT_EQ(flat_tensor->tensor_alignment(), 16);
+  ASSERT_EQ(flat_tensor->tensors()->size(), 2);
+  ASSERT_EQ(flat_tensor->segments()->size(), 1);
+
+  auto tensor0 = flat_tensor->tensors()->Get(0);
+  ASSERT_EQ(strcmp(tensor0->fully_qualified_name()->c_str(), "linear.bias"), 0);
+  ASSERT_EQ(tensor0->scalar_type(), executorch_flatbuffer::ScalarType::FLOAT);
+  ASSERT_EQ(tensor0->sizes()->size(), 1);
+  ASSERT_EQ(tensor0->segment_index(), 0);
+  ASSERT_EQ(tensor0->offset(), 0);
+
+  auto tensor1 = flat_tensor->tensors()->Get(1);
+  ASSERT_EQ(strcmp(tensor1->fully_qualified_name()->c_str(), "linear.weight"), 0);
+  ASSERT_EQ(tensor1->scalar_type(), executorch_flatbuffer::ScalarType::FLOAT);
+  ASSERT_EQ(tensor1->sizes()->size(), 1);
+  ASSERT_EQ(tensor1->segment_index(), 0);
+  ASSERT_EQ(tensor1->offset(), 16);
+
+  // Test Segments
+  auto segment = flat_tensor->segments()->Get(0);
+
+  ASSERT_EQ(segment->offset(), 0);
+  ASSERT_EQ(segment->size(), 32);
+  uint8_t* data = (uint8_t*)(byte_buffer + segment_offset);
+  ASSERT_EQ(*(float*)(data + 0), linear_bias);
+  ASSERT_EQ(*(float*)(data + 16), linear_weight);
+
+}