From 1cba101ea8bffe0054b0433aad96adc7dad8af93 Mon Sep 17 00:00:00 2001
From: morelos <morelos@devvm4573.ash0.facebook.com>
Date: Fri, 13 Jun 2025 15:49:32 -0700
Subject: [PATCH] [ET-VK][Ops] choose_qparams ops skeleton test framework

Pull Request resolved: https://github.com/pytorch/executorch/pull/11554

# Context
In this diff we plan on creating the skeleton test framework for choose_qparams. This is necessary as we need a reference to test our vulkan implementation of the choose_qparams operators against an existing cpu implementation. This test framework is heavily inspired by [sdpa_test.cpp](https://github.com/pytorch/executorch/blob/main/backends/vulkan/test/op_tests/sdpa_test.cpp). We make use of the [op_choose_qparams.cpp](https://github.com/pytorch/executorch/blob/main/kernels/quantized/cpu/op_choose_qparams.cpp) cpu implementation of the `choose_qparams_tensor`, and the `choose_qparams_per_token_asymmetric` operators.

An explanation for the operator is included where the actual vulkan implementation is created in a future diff along this stack.
# Changes
The main thing in this difference is the creation of a new test framework `choose_qparams_test.cpp`, and also including it in targets.bzl such that we can properly call the test. As this is inspired by sdpa_test.cpp, we also follow a similar format. First we have forward declarations of the functions that we wish to test against (choose_qparams_tensor, and choose_qparams_per_token_asymmetric). Then we also have wrappers for the functions without context, and finally wrappers for the ATen implementations of the same operators using the `WRAP_TO_ATEN` macro. We don't need context as this is merely for testing. We also have a utility function to test the quantize arguments that will be used when actually using the vulkan implementation. This utility function is just for a sanity check.

One of the interesting things about this one is that since the wrapper macro doesn't support tuples with two pieces of data, we just return the scales. However this doesn't really impact anything in terms of evaluation. We also pass in a dummy eps value since its not using in the cpu implementation at all.
ghstack-source-id: 290376490
@exported-using-ghexport

Differential Revision: [D76436870](https://our.internmc.facebook.com/intern/diff/D76436870/)
---
 .../test/op_tests/choose_qparams_test.cpp     | 116 ++++++++++++++++++
 backends/vulkan/test/op_tests/targets.bzl     |   9 ++
 2 files changed, 125 insertions(+)
 create mode 100644 backends/vulkan/test/op_tests/choose_qparams_test.cpp
diff --git a/backends/vulkan/test/op_tests/choose_qparams_test.cpp b/backends/vulkan/test/op_tests/choose_qparams_test.cpp
new file mode 100644
index 00000000000..ec839cdf6bf
--- /dev/null
+++ b/backends/vulkan/test/op_tests/choose_qparams_test.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <ATen/ATen.h>
+
+#include <executorch/backends/vulkan/runtime/api/api.h>
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/extension/aten_util/make_aten_functor_from_et_functor.h>
+#include <executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h>
+
+#include "test_utils.h"
+
+#include <cassert>
+#include <iostream>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+// Forward declarations of the functions we're testing
+std::tuple<Tensor&, Tensor&> choose_qparams_tensor_out(
+    const Tensor& input,
+    int64_t quant_min,
+    int64_t quant_max,
+    ET_UNUSED double eps,
+    ScalarType dtype,
+    Tensor& scale_out,
+    Tensor& zero_point_out);
+
+std::tuple<Tensor&, Tensor&> choose_qparams_per_token_asymmetric_out(
+    const Tensor& input,
+    ScalarType dtype,
+    Tensor& scale_out,
+    Tensor& zero_point_out);
+
+// Wrapper function for choose_qparams_tensor_out without context
+Tensor& choose_qparams_tensor_out_no_context(
+    const Tensor& input,
+    int64_t quant_min,
+    int64_t quant_max,
+    ET_UNUSED double eps,
+    ScalarType dtype,
+    Tensor& scale_out,
+    Tensor& zero_point_out) {
+  torch::executor::native::choose_qparams_tensor_out(
+      input, quant_min, quant_max, eps, dtype, scale_out, zero_point_out);
+  return scale_out;
+}
+
+// Wrapper function for choose_qparams_per_token_asymmetric_out without context
+Tensor& choose_qparams_per_token_asymmetric_out_no_context(
+    const Tensor& input,
+    ScalarType dtype,
+    Tensor& scale_out,
+    Tensor& zero_point_out) {
+  torch::executor::native::choose_qparams_per_token_asymmetric_out(
+      input, dtype, scale_out, zero_point_out);
+  return scale_out;
+}
+
+// ATen wrapper for choose_qparams_tensor
+std::tuple<at::Tensor, at::Tensor> choose_qparams_tensor_aten(
+    const at::Tensor& input,
+    int64_t quant_min,
+    int64_t quant_max,
+    at::ScalarType dtype) {
+  auto scale_out = at::empty({}, at::device(at::kCPU).dtype(at::kDouble));
+  auto zero_point_out = at::empty({}, at::device(at::kCPU).dtype(at::kLong));
+  double eps = 1e-7;
+
+  ScalarType et_dtype = at_scalartype_to_et_scalartype(dtype);
+
+  // Use WRAP_TO_ATEN with the wrapper function
+  WRAP_TO_ATEN(choose_qparams_tensor_out_no_context, 5)
+  (input, quant_min, quant_max, eps, et_dtype, scale_out, zero_point_out);
+
+  return {scale_out, zero_point_out};
+}
+
+// ATen wrapper for choose_qparams_per_token_asymmetric
+std::tuple<at::Tensor, at::Tensor> choose_qparams_per_token_asymmetric_aten(
+    const at::Tensor& input,
+    at::ScalarType dtype) {
+  // Calculate output sizes for scale and zero_point tensors
+  std::vector<int64_t> output_sizes;
+  for (int64_t i = 0; i < input.dim() - 1; i++) {
+    output_sizes.push_back(input.size(i));
+  }
+  output_sizes.push_back(1);
+
+  auto scale_out =
+      at::empty(output_sizes, at::device(at::kCPU).dtype(at::kDouble));
+  auto zero_point_out =
+      at::empty(output_sizes, at::device(at::kCPU).dtype(at::kLong));
+
+  ScalarType et_dtype = at_scalartype_to_et_scalartype(dtype);
+
+  // Use WRAP_TO_ATEN with the wrapper function
+  WRAP_TO_ATEN(choose_qparams_per_token_asymmetric_out_no_context, 2)
+  (input, et_dtype, scale_out, zero_point_out);
+
+  return {scale_out, zero_point_out};
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/backends/vulkan/test/op_tests/targets.bzl b/backends/vulkan/test/op_tests/targets.bzl
index a22f2323896..0d014c7ef29 100644
--- a/backends/vulkan/test/op_tests/targets.bzl
+++ b/backends/vulkan/test/op_tests/targets.bzl
@@ -195,6 +195,15 @@ def define_common_targets(is_fbcode = False):
             "//executorch/extension/aten_util:aten_bridge",
         ]
     )
+    define_test_targets(
+        "choose_qparams_test",
+        extra_deps = [
+            ":test_utils",
+            "//executorch/kernels/quantized/cpu:op_choose_qparams",
+            "//executorch/extension/tensor:tensor",
+            "//executorch/extension/aten_util:aten_bridge",
+        ]
+    )
     define_test_targets(
         "linear_weight_int4_test",
         extra_deps = [