Add portable randn kernel implementation

GregoryComer · GregoryComer · commit 478494b57baa · 2025-05-23T22:05:11.000-07:00
diff --git a/kernels/portable/cpu/op_randn.cpp b/kernels/portable/cpu/op_randn.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <c10/util/irange.h>
+
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+#include <random>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using executorch::aten::IntArrayRef;
+using Tensor = executorch::aten::Tensor;
+using ScalarType = executorch::aten::ScalarType;
+
+template <class CTYPE>
+void impl(CTYPE* data, int64_t numel, std::mt19937& gen, std::normal_distribution<double>& dist) {
+    for (const auto i : c10::irange(numel)) {
+        auto val = dist(gen);
+        data[i] = static_cast<CTYPE>(val);
+    }
+}
+
+Tensor& randn_out(
+    KernelRuntimeContext& ctx,
+    const IntArrayRef sizes,
+    Tensor& out) {
+  (void)ctx;
+
+  std::mt19937 gen((std::random_device())());
+  std::normal_distribution<double> dist(0.0, 1.0);
+
+  // Resize for dynamic shape
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_tensor(out, sizes) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor.");
+
+    ET_SWITCH_FLOATHBF16_TYPES(out.scalar_type(), ctx, "randn.out", CTYPE, [&] {
+        auto data_out = out.mutable_data_ptr<CTYPE>();
+        impl(data_out, out.numel(), gen, dist);
+        /*
+        for (const auto i : c10::irange(out.numel())) {
+            data_out[i] = static_cast<CTYPE>(dist(gen));
+        }*/
+    });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
+
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -713,6 +713,12 @@
     - arg_meta: null
       kernel_name: torch::executor::prod_out
 
+- op: randn.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::randn_out
+  tags: nondeterministic_seeded
+
 - op: reciprocal.out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt
@@ -197,6 +197,7 @@ set(all_test_sources
     "op_permute_copy_test.cpp"
     "op_pixel_shuffle_test.cpp"
     "op_prod_test.cpp"
+    "op_randn_test.cpp"
     "op_reciprocal_test.cpp"
     "op_relu_test.cpp"
     "op_remainder_test.cpp"
diff --git a/kernels/test/op_randn_test.cpp b/kernels/test/op_randn_test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <c10/util/irange.h>
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/kernels/test/supported_features.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <numeric>
+
+using executorch::aten::IntArrayRef;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using torch::executor::testing::TensorFactory;
+
+class OpRandnTest : public OperatorTest {
+ protected:
+  void op_randn_out(
+      const IntArrayRef sizes,
+      Tensor& out) {
+    torch::executor::aten::randn_outf(
+        context_, sizes, out);
+  }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_randn(std::vector<int64_t>& sizes) {
+    TensorFactory<DTYPE> tf;
+
+    // Tensor factory wants int32 scales, op kernel wants int64.
+    std::vector<int32_t> sizes_i32;
+    std::transform(sizes.begin(), sizes.end(), std::back_inserter(sizes_i32), 
+        [] (int64_t s) { return static_cast<int32_t>(s); });
+    Tensor out = tf.zeros(sizes_i32);
+
+    IntArrayRef sizes_ref(sizes.data(), sizes.size());
+    op_randn_out(sizes_ref, out);
+
+    // Check mean and standard deviation. To avoid flaky CI, test pretty loosely.
+    auto out_data = out.const_data_ptr<CTYPE>();
+    double mean = std::accumulate(out_data, out_data + out.numel(), 0.0, [](double acc, CTYPE n) { return acc + static_cast<double>(n); }) / out.numel();
+    double var = std::accumulate(out_data, out_data + out.numel(), 0.0, 
+     [=](double acc, CTYPE n) { return acc + std::pow(static_cast<double>(n) - mean, 2); }) / out.numel();
+    auto stdev = std::sqrt(var);
+
+    // These are very rough thresholds. A better test implementation would probably do a proper
+    // statistical test to compare the generated empirical data to the reference distribution, but
+    // this should do for now.
+    EXPECT_LE(std::abs(mean), 5.0 / std::sqrt(out.numel()));
+    EXPECT_LE(std::abs(stdev - 1.0), 0.1);
+    EXPECT_GT(stdev, 0);
+  }
+};
+
+TEST_F(OpRandnTest, SmokeTest) {
+  std::vector<int64_t> sizes = {2, 3, 4, 128};
+
+#define TEST_ENTRY(ctype, dtype) \
+  test_randn<ctype, ScalarType::dtype>(sizes);
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpRandnTest, Rank) {
+    std::vector<int64_t> sizes = {1024};
+
+    for (int64_t i = 0; i < 4; i++) {
+        sizes.push_back(i + 1);
+        test_randn<float, executorch::aten::ScalarType::Float>(sizes);
+    }
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
@@ -285,6 +285,7 @@ def define_common_targets():
     _common_op_test("op_pixel_unshuffle_test", ["aten", "portable"])
     _common_op_test("op_pow_test", ["aten", "portable"])
     _common_op_test("op_prod_test", ["aten", "portable"])
+    _common_op_test("op_randn_test", ["aten", "portable"])
     _common_op_test("op_reciprocal_test", ["aten", "portable"])
     _common_op_test("op_relu_test", ["aten", "portable"])
     _common_op_test("op_remainder_test", ["aten", "portable"])
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -973,6 +973,14 @@ ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),
+    op_target(
+        name = "op_randn",
+        deps = [
+            ":scalar_utils",
+            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ]
+    ),
     op_target(
         name = "op_reciprocal",
         deps = [