Add unit tests for convert_to_bfloat

larryliu0820 · larryliu0820 · commit fb4940ee4ab2 · 2025-10-10T11:50:47.000-07:00
diff --git a/backends/aoti/common_shims.cpp b/backends/aoti/common_shims.cpp
@@ -50,14 +50,36 @@ AOTITorchError aoti_torch_get_storage_offset(
 }
 
 AOTITorchError aoti_torch_get_strides(Tensor* tensor, int64_t** ret_strides) {
-  std::vector<int64_t> strides(tensor->dim());
-  auto tensor_strides = tensor->strides();
-  for (ssize_t i = 0; i < tensor->dim(); i++) {
-    strides[i] = static_cast<int64_t>(tensor_strides[i]);
+  auto it = internal::tensor_to_strides.find(tensor);
+  bool needs_update = false;
+
+  if (it == internal::tensor_to_strides.end()) {
+    needs_update = true;
+  } else {
+    // Check if cached values are still valid
+    auto tensor_strides = tensor->strides();
+    if (it->second.size() != static_cast<size_t>(tensor->dim())) {
+      needs_update = true;
+    } else {
+      for (int i = 0; i < tensor->dim(); i++) {
+        if (it->second[i] != tensor_strides[i]) {
+          needs_update = true;
+          break;
+        }
+      }
+    }
+  }
+
+  if (needs_update) {
+    std::vector<int64_t> strides(tensor->dim());
+    auto tensor_strides = tensor->strides();
+    for (int i = 0; i < tensor->dim(); i++) {
+      strides[i] = tensor_strides[i];
+    }
+    it =
+        internal::tensor_to_strides.insert_or_assign(tensor, std::move(strides))
+            .first;
   }
-  auto it =
-      internal::tensor_to_strides.insert_or_assign(tensor, std::move(strides))
-          .first;
 
   // For 0D tensors, data() returns nullptr on empty vectors, but we need to
   // return a valid pointer
@@ -78,13 +100,35 @@ AOTITorchError aoti_torch_get_dtype(Tensor* tensor, int32_t* ret_dtype) {
 }
 
 AOTITorchError aoti_torch_get_sizes(Tensor* tensor, int64_t** ret_sizes) {
-  std::vector<int64_t> sizes(tensor->dim());
-  auto tensor_sizes = tensor->sizes();
-  for (ssize_t i = 0; i < tensor->dim(); i++) {
-    sizes[i] = static_cast<int64_t>(tensor_sizes[i]);
+  auto it = internal::tensor_to_sizes.find(tensor);
+  bool needs_update = false;
+
+  if (it == internal::tensor_to_sizes.end()) {
+    needs_update = true;
+  } else {
+    // Check if cached values are still valid
+    auto tensor_sizes = tensor->sizes();
+    if (it->second.size() != static_cast<size_t>(tensor->dim())) {
+      needs_update = true;
+    } else {
+      for (int i = 0; i < tensor->dim(); i++) {
+        if (it->second[i] != tensor_sizes[i]) {
+          needs_update = true;
+          break;
+        }
+      }
+    }
+  }
+
+  if (needs_update) {
+    std::vector<int64_t> sizes(tensor->dim());
+    auto tensor_sizes = tensor->sizes();
+    for (int i = 0; i < tensor->dim(); i++) {
+      sizes[i] = tensor_sizes[i];
+    }
+    it = internal::tensor_to_sizes.insert_or_assign(tensor, std::move(sizes))
+             .first;
   }
-  auto it = internal::tensor_to_sizes.insert_or_assign(tensor, std::move(sizes))
-                .first;
 
   // For 0D tensors, data() returns nullptr on empty vectors, but we need to
   // return a valid pointer
diff --git a/extension/llm/runner/test/CMakeLists.txt b/extension/llm/runner/test/CMakeLists.txt
@@ -19,7 +19,8 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake)
 
 set(_test_srcs
     test_generation_config.cpp test_text_llm_runner.cpp test_text_prefiller.cpp
-    test_text_decoder_runner.cpp test_multimodal_input.cpp test_wav_loader.cpp
+    test_text_decoder_runner.cpp test_multimodal_input.cpp test_util.cpp
+    test_wav_loader.cpp
 )
 
 # Add LSan stub for Apple platforms
diff --git a/extension/llm/runner/test/targets.bzl b/extension/llm/runner/test/targets.bzl
@@ -45,6 +45,16 @@ def define_common_targets():
         ],
     )
 
+    runtime.cxx_test(
+        name = "test_util",
+        srcs = ["test_util.cpp"],
+        deps = [
+            "//executorch/extension/llm/runner:stats",
+            "//executorch/extension/tensor:tensor",
+            "//executorch/runtime/core:core",
+        ],
+    )
+
     runtime.cxx_test(
         name = "test_wav_loader",
         srcs = ["test_wav_loader.cpp"],
diff --git a/extension/llm/runner/test/test_util.cpp b/extension/llm/runner/test/test_util.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/extension/llm/runner/util.h>
+#include <executorch/extension/tensor/tensor_ptr_maker.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace {
+
+using ::executorch::aten::ScalarType;
+using ::executorch::extension::make_tensor_ptr;
+using ::executorch::extension::llm::convert_to_bfloat16;
+
+TEST(ConvertToBFloat16Test, ConvertsFloatTensorData) {
+  auto source_tensor = make_tensor_ptr<float>(
+      {2, 2}, std::vector<float>{0.0f, 1.5f, -2.0f, 3.25f});
+
+  auto result = convert_to_bfloat16(source_tensor);
+  ASSERT_TRUE(result.ok());
+  auto bf16_tensor = *result;
+
+  EXPECT_EQ(bf16_tensor->scalar_type(), ScalarType::BFloat16);
+  EXPECT_EQ(bf16_tensor->numel(), source_tensor->numel());
+
+  auto src_sizes = source_tensor->sizes();
+  auto dst_sizes = bf16_tensor->sizes();
+  ASSERT_EQ(dst_sizes.size(), src_sizes.size());
+  for (size_t dim = 0; dim < dst_sizes.size(); ++dim) {
+    EXPECT_EQ(dst_sizes[dim], src_sizes[dim]);
+  }
+
+  const auto* converted_data = bf16_tensor->const_data_ptr<::c10::BFloat16>();
+  const auto* original_data = source_tensor->const_data_ptr<float>();
+  ASSERT_NE(converted_data, nullptr);
+  ASSERT_NE(original_data, nullptr);
+
+  for (size_t i = 0; i < static_cast<size_t>(source_tensor->numel()); ++i) {
+    EXPECT_NEAR(static_cast<float>(converted_data[i]), original_data[i], 1e-2f);
+  }
+}
+
+TEST(ConvertToBFloat16Test, RejectsNonFloatTensor) {
+  auto non_float_tensor =
+      make_tensor_ptr<int64_t>({3}, std::vector<int64_t>{1, 2, 3});
+
+  auto result = convert_to_bfloat16(non_float_tensor);
+  EXPECT_FALSE(result.ok());
+  EXPECT_EQ(result.error(), ::executorch::runtime::Error::InvalidArgument);
+}
+
+} // namespace
diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h
@@ -152,30 +152,17 @@ convert_to_bfloat16(const ::executorch::extension::TensorPtr& src_tensor) {
       InvalidArgument,
       "BFloat16 conversion only supported from Float source data");
 
-  size_t num_elements = src_tensor->numel();
-  auto sizes = src_tensor->sizes();
-
-  // Allocate memory for bfloat16 data
-  auto* bf16_data = new uint16_t[num_elements];
+  const auto num_elements = static_cast<size_t>(src_tensor->numel());
   const float* float_data = src_tensor->const_data_ptr<float>();
 
-  // Convert float to bfloat16
+  auto bf16_tensor = ::executorch::extension::empty_like(
+      src_tensor, ::executorch::aten::ScalarType::BFloat16);
+  auto* bf16_data = bf16_tensor->mutable_data_ptr<::c10::BFloat16>();
   for (size_t i = 0; i < num_elements; ++i) {
-    // bfloat16 is the upper 16 bits of float32
-    uint32_t float_bits;
-    std::memcpy(&float_bits, &float_data[i], sizeof(float));
-
-    // Rounding: add 0x7FFF to round to nearest even
-    uint32_t rounding_bias = 0x7FFF + ((float_bits >> 16) & 1);
-    bf16_data[i] = static_cast<uint16_t>((float_bits + rounding_bias) >> 16);
+    bf16_data[i] = ::c10::BFloat16(float_data[i]);
   }
 
-  // Create tensor with deleter to free allocated memory
-  return ::executorch::extension::from_blob(
-      bf16_data,
-      {sizes.begin(), sizes.end()},
-      ::executorch::aten::ScalarType::BFloat16,
-      [](void* ptr) { delete[] static_cast<uint16_t*>(ptr); });
+  return bf16_tensor;
 }
 
 } // namespace llm

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,8 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake)`
`19`	`19`
`20`	`20`	`set(_test_srcs`
`21`	`21`	`test_generation_config.cpp test_text_llm_runner.cpp test_text_prefiller.cpp`
`22`		`- test_text_decoder_runner.cpp test_multimodal_input.cpp test_wav_loader.cpp`
	`22`	`+ test_text_decoder_runner.cpp test_multimodal_input.cpp test_util.cpp`
	`23`	`+ test_wav_loader.cpp`
`23`	`24`	`)`
`24`	`25`
`25`	`26`	`# Add LSan stub for Apple platforms`