[devtool] introduce datasink class to etdump

Gasoonjia · Gasoonjia · commit fc82d8212f84 · 2025-02-14T00:35:29.000-08:00
Pull Request resolved: #8496 this diff introduce datasink class, the class for managing the customized debug data storage pipeline. Detials can be found in https://docs.google.com/document/d/1y_m32mKdj-OgLcLUz9TKhBW3PC3bBDYSBbeAH544EfM/edit?tab=t.0 Differential Revision: [D69583422](https://our.internmc.facebook.com/intern/diff/D69583422/) ghstack-source-id: 266455279
diff --git a/devtools/etdump/data_sink.cpp b/devtools/etdump/data_sink.cpp
@@ -0,0 +1,32 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#include <executorch/devtools/etdump/data_sink.h>
+#include <executorch/devtools/etdump/utils.h>
+
+namespace executorch {
+namespace etdump {
+
+size_t DataSink::write_tensor(const executorch::aten::Tensor& tensor) {
+  if (tensor.nbytes() == 0) {
+    return static_cast<size_t>(-1);
+  }
+  uint8_t* offset_ptr =
+      internal::alignPointer(debug_buffer_.data() + offset_, 64);
+  offset_ = (offset_ptr - debug_buffer_.data()) + tensor.nbytes();
+  ET_CHECK_MSG(
+      offset_ <= debug_buffer_.size(),
+      "Ran out of space to store tensor data.");
+  memcpy(offset_ptr, tensor.const_data_ptr(), tensor.nbytes());
+  return (size_t)(offset_ptr - debug_buffer_.data());
+}
+
+size_t DataSink::get_storage_size() const {
+  return debug_buffer_.size();
+}
+
+size_t DataSink::get_used_bytes() const {
+  return offset_;
+}
+
+} // namespace etdump
+} // namespace executorch
diff --git a/devtools/etdump/data_sink.h b/devtools/etdump/data_sink.h
@@ -0,0 +1,28 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#pragma once
+
+#include <executorch/devtools/etdump/data_sink_base.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/span.h>
+
+namespace executorch {
+namespace etdump {
+
+class DataSink : public DataSinkBase {
+ public:
+  explicit DataSink(::executorch::runtime::Span<uint8_t> buffer)
+      : debug_buffer_(buffer), offset_(0) {}
+
+  size_t write_tensor(const executorch::aten::Tensor& tensor) override;
+  size_t get_storage_size() const override;
+  size_t get_used_bytes() const override;
+
+ private:
+  ::executorch::runtime::Span<uint8_t> debug_buffer_;
+  // The offset of the next available location in the debug storage
+  size_t offset_;
+};
+
+} // namespace etdump
+} // namespace executorch
diff --git a/devtools/etdump/data_sink_base.h b/devtools/etdump/data_sink_base.h
@@ -0,0 +1,49 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <cstdlib>
+
+namespace executorch {
+namespace etdump {
+
+/**
+ * DataSinkBase is an abstract class that users can inherit and implement
+ * to customize the storage and management of debug data in ETDumpGen. This
+ * class provides an basic and essential interface for writing tensor data to a
+ * user-defined storage, retrieving storage capacity, and tracking the amount of
+ * data stored.
+ */
+class DataSinkBase {
+ public:
+  /**
+   * Virtual destructor to ensure proper cleanup of derived classes.
+   */
+  virtual ~DataSinkBase() = default;
+  /**
+   * Write tensor data into the debug storage. This method should be implemented
+   * by derived classes to handle the specifics of data storage.
+   *
+   * @param[in] tensor The tensor data to be written into the storage.
+   * @return The offset of the starting location of the tensor data within the
+   *         debug storage, which will be recorded in corresponding tensor
+   * metadata of ETDump.
+   */
+  virtual size_t write_tensor(const executorch::aten::Tensor& tensor) = 0;
+  /**
+   * Get the maximum capacity of the debug storage in bytes.
+   *
+   * @return The total size of the debug storage.
+   */
+  virtual size_t get_storage_size() const = 0;
+  /**
+   * Get the number of bytes currently used in the debug storage.
+   *
+   * @return The amount of data currently stored in bytes.
+   */
+  virtual size_t get_used_bytes() const = 0;
+};
+
+} // namespace etdump
+} // namespace executorch
diff --git a/devtools/etdump/etdump_flatcc.cpp b/devtools/etdump/etdump_flatcc.cpp
@@ -13,6 +13,7 @@
 #include <executorch/devtools/etdump/emitter.h>
 #include <executorch/devtools/etdump/etdump_schema_flatcc_builder.h>
 #include <executorch/devtools/etdump/etdump_schema_flatcc_reader.h>
+#include <executorch/devtools/etdump/utils.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <executorch/runtime/platform/assert.h>
@@ -94,16 +95,6 @@ etdump_Tensor_ref_t add_tensor_entry(
   return etdump_Tensor_end(builder_);
 }
 
-static uint8_t* alignPointer(void* ptr, size_t alignment) {
-  intptr_t addr = reinterpret_cast<intptr_t>(ptr);
-  if ((addr & (alignment - 1)) == 0) {
-    // Already aligned.
-    return reinterpret_cast<uint8_t*>(ptr);
-  }
-  addr = (addr | (alignment - 1)) + 1;
-  return reinterpret_cast<uint8_t*>(addr);
-}
-
 } // namespace
 
 // Constructor implementation
@@ -113,9 +104,10 @@ ETDumpGen::ETDumpGen(Span<uint8_t> buffer) {
   // Initialize the flatcc builder_ using the buffer and buffer size.
 
   if (buffer.data() != nullptr) {
-    builder_ = (struct flatcc_builder*)alignPointer(buffer.data(), 64);
-    uintptr_t buffer_with_builder =
-        (uintptr_t)alignPointer(builder_ + sizeof(struct flatcc_builder), 64);
+    builder_ =
+        (struct flatcc_builder*)internal::alignPointer(buffer.data(), 64);
+    uintptr_t buffer_with_builder = (uintptr_t)internal::alignPointer(
+        builder_ + sizeof(struct flatcc_builder), 64);
     size_t builder_size =
         (size_t)(buffer_with_builder - (uintptr_t)buffer.data());
     size_t min_buf_size = max_alloc_buf_size + builder_size;
@@ -513,7 +505,7 @@ size_t ETDumpGen::copy_tensor_to_debug_buffer(executorch::aten::Tensor tensor) {
     return static_cast<size_t>(-1);
   }
   uint8_t* offset_ptr =
-      alignPointer(debug_buffer_.data() + debug_buffer_offset_, 64);
+      internal::alignPointer(debug_buffer_.data() + debug_buffer_offset_, 64);
   debug_buffer_offset_ = (offset_ptr - debug_buffer_.data()) + tensor.nbytes();
   ET_CHECK_MSG(
       debug_buffer_offset_ <= debug_buffer_.size(),
diff --git a/devtools/etdump/targets.bzl b/devtools/etdump/targets.bzl
@@ -87,8 +87,54 @@ def define_common_targets():
         exported_external_deps = ["flatccrt"],
     )
 
+    runtime.cxx_library(
+        name = "utils",
+        srcs = [],
+        exported_headers = [
+            "utils.h",
+        ],
+        visibility = [
+
+        ],
+    )
+
     for aten_mode in (True, False):
         aten_suffix = "_aten" if aten_mode else ""
+
+        runtime.cxx_library(
+            name = "data_sink_base" + aten_suffix,
+            exported_headers = [
+                "data_sink_base.h",
+            ],
+            exported_deps = [
+                "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix,
+            ],
+            visibility = [
+                "//executorch/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+        )
+
+        runtime.cxx_library(
+            name = "data_sink" + aten_suffix,
+            headers = [
+                "data_sink.h",
+            ],
+            srcs = [
+                "data_sink.cpp",
+            ],
+            deps = [
+                ":utils",
+            ],
+            exported_deps = [
+                "//executorch/runtime/core/exec_aten:lib" + aten_suffix,
+                ":data_sink_base" + aten_suffix,
+            ],
+            visibility = [
+                "//executorch/...",
+                "@EXECUTORCH_CLIENTS",
+            ],
+        )
         runtime.cxx_library(
             name = "etdump_flatcc" + aten_suffix,
             srcs = [
@@ -103,9 +149,11 @@ def define_common_targets():
             ],
             deps = [
                 "//executorch/runtime/platform:platform",
+                ":utils",
             ],
             exported_deps = [
                 ":etdump_schema_flatcc",
+                ":utils",
                 "//executorch/runtime/core:event_tracer" + aten_suffix,
                 "//executorch/runtime/core/exec_aten/util:scalar_type_util" + aten_suffix,
             ],
diff --git a/devtools/etdump/tests/data_sink_test.cpp b/devtools/etdump/tests/data_sink_test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/devtools/etdump/data_sink.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/span.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <executorch/test/utils/DeathTest.h>
+#include <gtest/gtest.h>
+
+using namespace ::testing;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using ::executorch::runtime::Span;
+using torch::executor::testing::TensorFactory;
+
+class DataSinkTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    torch::executor::runtime_init();
+    // Allocate a small buffer for testing
+    buffer_size_ = 128; // Small size for testing
+    buffer_ptr_ = malloc(buffer_size_);
+    buffer_ = Span<uint8_t>(static_cast<uint8_t*>(buffer_ptr_), buffer_size_);
+    data_sink_ = std::make_unique<executorch::etdump::DataSink>(buffer_);
+  }
+
+  void TearDown() override {
+    free(buffer_ptr_);
+  }
+
+  size_t buffer_size_;
+  void* buffer_ptr_;
+  Span<uint8_t> buffer_;
+  std::unique_ptr<executorch::etdump::DataSink> data_sink_;
+};
+
+TEST_F(DataSinkTest, StorageSizeCheck) {
+  EXPECT_EQ(data_sink_->get_storage_size(), buffer_size_);
+}
+
+TEST_F(DataSinkTest, WriteOneTensorAndCheckData) {
+  TensorFactory<ScalarType::Float> tf;
+  Tensor tensor = tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0});
+
+  size_t offset = data_sink_->write_tensor(tensor);
+  EXPECT_NE(offset, static_cast<size_t>(-1));
+
+  // Check that the data in the buffer matches the tensor data
+  const float* buffer_data =
+      reinterpret_cast<const float*>(buffer_.data() + offset);
+  for (size_t i = 0; i < tensor.numel(); ++i) {
+    EXPECT_EQ(buffer_data[i], tensor.const_data_ptr<float>()[i]);
+  }
+}
+
+TEST_F(DataSinkTest, WriteMultiTensorsAndCheckData) {
+  TensorFactory<ScalarType::Float> tf;
+  std::vector<Tensor> tensors = {
+      tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0}),
+      tf.make({1, 4}, {5.0, 6.0, 7.0, 8.0})};
+  size_t offset = 0;
+  for (const auto& tensor : tensors) {
+    offset = data_sink_->write_tensor(tensor);
+    EXPECT_NE(offset, static_cast<size_t>(-1));
+    // Check that the data in the buffer matches the tensor data
+    const float* buffer_data =
+        reinterpret_cast<const float*>(buffer_.data() + offset);
+    for (size_t i = 0; i < tensor.numel(); ++i) {
+      EXPECT_EQ(buffer_data[i], tensor.const_data_ptr<float>()[i]);
+    }
+  }
+}
+
+TEST_F(DataSinkTest, PointerAlignmentCheck) {
+  TensorFactory<ScalarType::Float> tf;
+  Tensor tensor = tf.make({1, 4}, {1.0, 2.0, 3.0, 4.0});
+  size_t offset = data_sink_->write_tensor(tensor);
+  EXPECT_NE(offset, static_cast<size_t>(-1));
+  // Check that the offset pointer is 64-byte aligned
+  const uint8_t* offset_ptr = buffer_.data() + offset;
+  EXPECT_EQ(reinterpret_cast<uintptr_t>(offset_ptr) % 64, 0);
+}
+
+TEST_F(DataSinkTest, WriteUntilOverflow) {
+  TensorFactory<ScalarType::Float> tf;
+  Tensor tensor = tf.zeros({1, 8}); // Large tensor to fill the buffer
+
+  // Write tensors until we run out of space
+  for (size_t i = 0; i < 2; i++) {
+    data_sink_->write_tensor(tensor);
+  }
+
+  // Attempting to write another tensor should raise an error
+  ET_EXPECT_DEATH(
+      data_sink_->write_tensor(tensor),
+      "Ran out of space to store tensor data.");
+}
diff --git a/devtools/etdump/tests/targets.bzl b/devtools/etdump/tests/targets.bzl
@@ -19,3 +19,14 @@ def define_common_targets():
             "//executorch/runtime/core/exec_aten/testing_util:tensor_util",
         ],
     )
+
+    runtime.cxx_test(
+        name = "data_sink_test",
+        srcs = [
+            "data_sink_test.cpp",
+        ],
+        deps = [
+            "//executorch/devtools/etdump:data_sink",
+            "//executorch/runtime/core/exec_aten/testing_util:tensor_util",
+        ],
+    )
diff --git a/devtools/etdump/utils.h b/devtools/etdump/utils.h
@@ -0,0 +1,24 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#include <cstddef>
+#include <cstdint>
+
+#pragma once
+
+namespace executorch {
+namespace etdump {
+namespace internal {
+
+inline uint8_t* alignPointer(void* ptr, size_t alignment) {
+  intptr_t addr = reinterpret_cast<intptr_t>(ptr);
+  if ((addr & (alignment - 1)) == 0) {
+    // Already aligned.
+    return reinterpret_cast<uint8_t*>(ptr);
+  }
+  addr = (addr | (alignment - 1)) + 1;
+  return reinterpret_cast<uint8_t*>(addr);
+}
+
+} // namespace internal
+} // namespace etdump
+} // namespace executorch