pytorch
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 1 addition & 2 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/tan.glsl‎
Lines changed: 60 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/tan.glsl‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/tan.yaml‎
Lines changed: 13 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/tan.yaml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Clone.cpp‎
Lines changed: 12 additions & 1 deletion b/‎backends/vulkan/runtime/graph/ops/impl/Clone.cpp‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Common.cpp‎
Lines changed: 5 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Common.cpp‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Common.h‎
Lines changed: 2 additions & 10 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Common.h‎
Lines changed: 2 additions & 10 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Tan.cpp‎
Lines changed: 64 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Tan.cpp‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎backends/vulkan/test/op_tests/cases.py‎
Lines changed: 16 additions & 0 deletions b/‎backends/vulkan/test/op_tests/cases.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎backends/vulkan/test/utils/test_utils.cpp‎
Lines changed: 3 additions & 6 deletions b/‎backends/vulkan/test/utils/test_utils.cpp‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎backends/vulkan/test/utils/test_utils.h‎
Lines changed: 2 additions & 2 deletions b/‎backends/vulkan/test/utils/test_utils.h‎
Lines changed: 2 additions & 2 deletions
@@ -158,8 +158,7 @@ build_executorch_runner() {
 cmake_install_executorch_lib() {
   echo "Installing libexecutorch.a and libportable_kernels.a"
   clean_executorch_install_folders
-  retry cmake -DBUCK2="$BUCK" \
-          -DCMAKE_INSTALL_PREFIX=cmake-out \
+  retry cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
           -DCMAKE_BUILD_TYPE=Release \
           -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
           -Bcmake-out .
 
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_load_type(DTYPE, STORAGE)}
+#define T ${buffer_scalar_type(DTYPE)}
+
+${define_active_storage_type(STORAGE)}
+
+#include "indexing_utils.h"
+
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+$if STORAGE == "buffer":
+  ${layout_declare_ubo(2, "int", "numel")}
+$else:
+  ${layout_declare_ubo(2, "ivec3", "out_limits")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#include "activations.h"
+
+#ifdef USING_BUFFER
+
+void main() {
+  const int i = int(gl_GlobalInvocationID.x);
+  if (i >= numel) {
+    return;
+  }
+
+  float in_val = float(t_in[i]);
+  t_out[i] = T(tan(in_val));
+}
+
+#else
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+  if (any(greaterThanEqual(pos, out_limits))) {
+    return;
+  }
+
+  VEC4_T in_texel = texelFetch(t_in, pos, 0);
+  imageStore(t_out, pos, VEC4_T(tan(in_texel)));
+}
+
+#endif
@@ -0,0 +1,13 @@
+tan:
+  parameter_names_with_default_values:
+    DTYPE: float
+    STORAGE: texture3d
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+      - VALUE: float
+    STORAGE:
+      - VALUE: texture3d
+      - VALUE: buffer
+  shader_variants:
+    - NAME: tan
@@ -61,6 +61,17 @@ void add_clone_node(
       resize_clone_node));
 }
 
+utils::uvec3 clone_image_to_buffer_global_wg_size(
+    ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& resize_args) {
+  (void)shader;
+  (void)resize_args;
+  const ValueRef image = args.at(1).refs.at(0);
+  return graph->create_global_wg_size(image);
+}
+
 void add_image_to_buffer_node(
     ComputeGraph& graph,
     const ValueRef image,
@@ -72,7 +83,7 @@ void add_image_to_buffer_node(
   graph.execute_nodes().emplace_back(new DynamicDispatchNode(
       graph,
       shader,
-      default_pick_global_wg_size,
+      clone_image_to_buffer_global_wg_size,
       default_pick_local_wg_size,
       // Input and Outputs
       {{buffer, vkapi::kWrite}, {image, vkapi::kRead}},
 
@@ -14,8 +14,9 @@ utils::uvec3 default_pick_global_wg_size(
     ComputeGraph* graph,
     const vkapi::ShaderInfo& shader,
     const std::vector<ArgGroup>& args,
-    const std::vector<ValueRef>& additional_args) {
+    const std::vector<ValueRef>& resize_args) {
   (void)shader;
+  (void)resize_args;
   const ValueRef out = args.at(0).refs.at(0);
   return graph->create_global_wg_size(out);
 }
@@ -25,8 +26,10 @@ utils::uvec3 default_pick_local_wg_size(
     const vkapi::ShaderInfo& shader,
     const utils::uvec3& global_workgroup_size,
     const std::vector<ArgGroup>& args,
-    const std::vector<ValueRef>& additional_args) {
+    const std::vector<ValueRef>& resize_args) {
   (void)shader;
+  (void)args;
+  (void)resize_args;
   return graph->create_local_wg_size(global_workgroup_size);
 }
 
 
@@ -17,31 +17,23 @@ namespace vkcompute {
  * Creates a global workgroup size based on the first output tensor in the args.
  * This is a utility function that extracts the output tensor from
  * args.at(0).refs.at(0) and calls graph->create_global_wg_size(out) on it.
- *
- * @param graph The ComputeGraph instance
- * @param args Vector of ArgGroup containing the output tensor reference
- * @return utils::uvec3 The global workgroup size
  */
 utils::uvec3 default_pick_global_wg_size(
     ComputeGraph* graph,
     const vkapi::ShaderInfo& shader,
     const std::vector<ArgGroup>& args,
-    const std::vector<ValueRef>& additional_args);
+    const std::vector<ValueRef>& resize_args);
 
 /**
  * Creates a local workgroup size based on the first output tensor in the args.
  * This is a utility function that extracts the output tensor from
  * args.at(0).refs.at(0) and calls graph->create_local_wg_size(out) on it.
- *
- * @param graph The ComputeGraph instance
- * @param args Vector of ArgGroup containing the output tensor reference
- * @return utils::uvec3 The local workgroup size
  */
 utils::uvec3 default_pick_local_wg_size(
     ComputeGraph* graph,
     const vkapi::ShaderInfo& shader,
     const utils::uvec3& global_workgroup_size,
     const std::vector<ArgGroup>& args,
-    const std::vector<ValueRef>& additional_args);
+    const std::vector<ValueRef>& resize_args);
 
 } // namespace vkcompute
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
+
+namespace vkcompute {
+
+using namespace utils;
+
+void resize_tan_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  (void)extra_args;
+  vTensorPtr out = graph->get_tensor(args[0].refs[0]);
+  vTensorPtr self = graph->get_tensor(args[1].refs[0]);
+
+  out->virtual_resize(self->sizes());
+}
+
+void add_tan_node(ComputeGraph& graph, const ValueRef in, const ValueRef out) {
+  std::string kernel_name = "tan";
+  add_dtype_suffix(kernel_name, graph.dtype_of(out));
+  add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
+
+  vkapi::ParamsBindList ubos({});
+  ubos.append({graph.logical_limits_ubo(out)});
+
+  graph.execute_nodes().emplace_back(new DispatchNode(
+      graph,
+      VK_KERNEL_FROM_STR(kernel_name),
+      graph.create_global_wg_size(out),
+      graph.create_local_wg_size(out),
+      // Inputs and Outputs
+      {{out, vkapi::kWrite}, {in, vkapi::kRead}},
+      // Shader params buffers
+      ubos,
+      // Push Constants
+      {},
+      // Specialization Constants
+      {},
+      // Resize Args
+      {},
+      // Resizing Logic
+      resize_tan_node));
+}
+
+void tan(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  return add_tan_node(graph, args[0], args[1]);
+}
+
+REGISTER_OPERATORS {
+  VK_REGISTER_OP(aten.tan.default, tan);
+}
+
+} // namespace vkcompute
@@ -1171,6 +1171,22 @@ def get_unary_ops_inputs():
     return test_suite
 
 
+# separate test suite from unary_ops for learning purposes
+@register_test_suite("aten.tan.default")
+def get_tan_inputs():
+    test_suite = VkTestSuite(
+        [
+            (M1,),
+            (M1, M2),
+            (S1, M1, M2),
+            (S1, S2, S2, M2),
+        ]
+    )
+    test_suite.storage_types = ["utils::kTexture3D", "utils::kBuffer"]
+    test_suite.dtypes = ["at::kFloat", "at::kHalf"]
+    return test_suite
+
+
 @register_test_suite("aten._native_batch_norm_legit_no_training.default")
 def get_native_batch_norm_inputs():
     Test = namedtuple(
 
@@ -547,8 +547,8 @@ vkcompute::ComputeGraph build_mm_graph(
     vkcompute::vkapi::ScalarType dtype,
     vkcompute::utils::StorageType in_out_stype,
     vkcompute::utils::GPUMemoryLayout memory_layout,
-    const bool prepack_mat2,
-    const float mat2_val) {
+    const std::vector<float>& mat2_data,
+    const bool prepack_mat2) {
   using namespace vkcompute;
   GraphConfig config;
   ComputeGraph graph(config);
@@ -569,10 +569,7 @@ vkcompute::ComputeGraph build_mm_graph(
       graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
   IOValueRef mat2{};
 
-  CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
-  if (mat2_val != 0.0f) {
-    std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
-  }
+  ValueRef mat2_w = graph.add_tensorref(mat2_size, dtype, mat2_data.data());
 
   if (prepack_mat2) {
     mat2.value = mat2_w;
 
@@ -265,8 +265,8 @@ vkcompute::ComputeGraph build_mm_graph(
     vkcompute::vkapi::ScalarType dtype,
     vkcompute::utils::StorageType in_out_stype,
     vkcompute::utils::GPUMemoryLayout memory_layout,
-    const bool prepack_mat2 = false,
-    const float mat2_val = 0.0f);
+    const std::vector<float>& mat2_data,
+    const bool prepack_mat2 = false);
 
 //
 // Debugging Utilities