pytorch · facebook-github-bot · Sep 8, 2025 · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -928,7 +928,9 @@ jobs:
         CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
         .ci/scripts/setup-linux.sh --build-tool "cmake"
 
+        # Custom operator tests
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
+        ./cmake-out/backends/vulkan/test/custom_ops/quantized_linear
 
   nxp-build-test:
     name: nxp-build-test

diff --git a/backends/vulkan/runtime/graph/ops/glsl/common.glslh b/backends/vulkan/runtime/graph/ops/glsl/common.glslh
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef COMMON_GLSLH
+#define COMMON_GLSLH
+
+#define align_up_4(x) ((x + 3) & -4)
+
+#define div_up_4(x) (((x) + 3) >> 2)
+
+#define mul_4(x) ((x) << 2)
+#define div_4(x) ((x) >> 2)
+
+#define mod_4(x) ((x) & 3)
+
+struct TensorIndex4D {
+  ivec4 data;
+};
+
+#ifdef DEBUG_MODE
+
+#extension GL_EXT_debug_printf : require
+
+void printTensorIndex4D(const TensorIndex4D index) {
+  debugPrintfEXT(
+      "tensor_idx: %d, %d, %d, %d\\n",
+      index.data.x,
+      index.data.y,
+      index.data.z,
+      index.data.w);
+}
+
+#endif // DEBUG_MODE
+
+#endif // COMMON_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_bias_load.glslh b/backends/vulkan/runtime/graph/ops/glsl/linear_bias_load.glslh
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef LINEAR_BIAS_LOAD_GLSLH
+#define LINEAR_BIAS_LOAD_GLSLH
+
+#include "linear_common.glslh"
+
+VEC4_T load_bias_x4(const uint n4) {
+  return t_bias[n4];
+}
+
+void load_bias_tile(out FPPerOutChannelParams bias, const uint n4_start) {
+#if TILE_N4 == 1
+  bias.data[0] = load_bias_x4(n4_start);
+
+#else
+  [[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
+    bias.data[n4] = load_bias_x4[n4_start + n4];
+  }
+
+#endif
+}
+
+#endif // LINEAR_BIAS_LOAD_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh b/backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+ * Defines common functions and structs to be used across matrix multiplication
+ * operators.
+ */
+
+#ifndef LINEAR_COMMON_GLSLH
+#define LINEAR_COMMON_GLSLH
+
+#include "common.glslh"
+
+// Represents floating point parameter tensors where each element is associated
+// with an output channel, such as weight scales, biases, etc.
+struct FPPerOutChannelParams {
+  VEC4_T data[TILE_N4];
+};
+
+#ifdef DEBUG_MODE
+
+void printFPPerOutChannelParams(const FPPerOutChannelParams params) {
+  debugPrintfEXT("per_out_channel_params: \\n");
+  [[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
+    debugPrintfEXT(
+        "  %f, %f, %f, %f, \\n",
+        params.data[n4].x,
+        params.data[n4].y,
+        params.data[n4].z,
+        params.data[n4].w);
+  }
+}
+
+#endif // DEBUG_MODE
+
+#endif // LINEAR_COMMON_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_fp_input_tile.glslh b/backends/vulkan/runtime/graph/ops/glsl/linear_fp_input_tile.glslh
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef LINEAR_FP_INPUT_TILE_GLSLH
+#define LINEAR_FP_INPUT_TILE_GLSLH
+
+/*
+ * Defines the FPInputTile struct, which is used to represent a tile of the
+ * input matrix of a matrix multiplication operation.
+ *
+ * Settings:
+ * - TILE_M: number of rows in the tile
+ * - TILE_K4: number of (groups of 4) columns in the tile
+ */
+
+struct FPInputTile {
+  VEC4_T data[TILE_M][TILE_K4];
+};
+
+#ifdef DEBUG_MODE
+
+void printFPInputTile(const FPInputTile in_tile) {
+  debugPrintfEXT("input_tile: \\n");
+  [[unroll]] for (int m = 0; m < TILE_M; ++m) {
+    [[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
+      debugPrintfEXT(
+          "  %f, %f, %f, %f, \\n",
+          in_tile.data[m][k4].x,
+          in_tile.data[m][k4].y,
+          in_tile.data[m][k4].z,
+          in_tile.data[m][k4].w);
+    }
+  }
+}
+
+#endif // DEBUG_MODE
+
+#endif // LINEAR_FP_INPUT_TILE_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_fp_input_tile_load.glslh b/backends/vulkan/runtime/graph/ops/glsl/linear_fp_input_tile_load.glslh
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+ * Defines functions to load a FPInputTile from input buffer/texture.
+ *
+ * Requires:
+ * - t_input to be declared in the shader layout (input buffer/texture)
+ *
+ * Settings:
+ * - INPUT_BUFFER to indicate input resource is a buffer, otherwise texture is
+ *   assumed.
+ */
+
+#ifndef LINEAR_FP_INPUT_TILE_LOAD_GLSLH
+#define LINEAR_FP_INPUT_TILE_LOAD_GLSLH
+
+#extension GL_EXT_control_flow_attributes : require
+
+#include "linear_fp_input_tile.glslh"
+
+#ifdef INPUT_BUFFER
+
+VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) {
+  return t_input[(m * ntexels_k) + k4];
+}
+
+#else
+
+VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) {
+  return texelFetch(t_input, ivec3(k4, m, 0), 0);
+}
+
+#endif // INPUT_BUFFER
+
+// To be used if (M - m_start >= TILE_M) || (K4 - k4_start >= TILE_K4)
+void load_input_tile_no_checks(
+    out FPInputTile in_tile,
+    const uint k4_start,
+    const uint m_start,
+    const uint K4,
+    const uint M) {
+#if TILE_K4 == 1
+  [[unroll]] for (int m = 0; m < TILE_M; ++m) {
+    in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
+  }
+
+#else
+  [[unroll]] for (int m = 0; m < TILE_M; ++m) {
+    [[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
+      in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
+    }
+  }
+#endif
+}
+
+// To be used if near tensor boundaries
+void load_input_tile_with_checks(
+    out FPInputTile in_tile,
+    const uint k4_start,
+    const uint m_start,
+    const uint K4,
+    const uint M) {
+#if TILE_K4 == 1
+  [[unroll]] for (int m = 0; m < TILE_M; ++m) {
+    if (m_start + m < M) {
+      in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
+    } else {
+      in_tile.data[m][0] = VEC4_T(0.0);
+    }
+  }
+
+#else
+  [[unroll]] for (int m = 0; m < TILE_M; ++m) {
+    [[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
+      if (m_start + m < M && k4_start + k4 < K4) {
+        in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
+      } else {
+        in_tile.data[m][k4] = VEC4_T(0.0);
+      }
+    }
+  }
+#endif
+}
+
+#endif // LINEAR_FP_INPUT_TILE_LOAD_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_fp_output_tile.glslh b/backends/vulkan/runtime/graph/ops/glsl/linear_fp_output_tile.glslh
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+ * Defines the FPOutTile struct, which is used to represent a tile of the output
+ * matrix of a matrix multiplication operation.
+ *
+ * Settings:
+ * - TILE_M: number of rows in the output tile
+ * - TILE_N4: number of (groups of 4) columns in the output tile
+ */
+
+#ifndef LINEAR_FP_OUTPUT_TILE_GLSLH
+#define LINEAR_FP_OUTPUT_TILE_GLSLH
+
+#extension GL_EXT_control_flow_attributes : require
+
+struct FPOutTile {
+  VEC4_T data[TILE_M][TILE_N4];
+};
+
+void initialize(out FPOutTile out_tile) {
+#if TILE_M > 1 && TILE_N4 == 1
+  [[unroll]] for (int y = 0; y < TILE_M; ++y) {
+    out_tile.data[y][0] = VEC4_T(0);
+  }
+
+#else
+  [[unroll]] for (int y = 0; y < TILE_M; ++y) {
+    [[unroll]] for (int x4 = 0; x4 < TILE_K4; ++x4) {
+      out_tile.data[y][x4] = VEC4_T(0);
+    }
+  }
+#endif
+}
+
+#ifdef DEBUG_MODE
+
+void printFPOutputTile(const FPOutTile tile) {
+  debugPrintfEXT("output_tile: \\n");
+  [[unroll]] for (int m = 0; m < TILE_M; ++m) {
+    [[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
+      debugPrintfEXT(
+          "  %f, %f, %f, %f, \\n",
+          tile.data[m][n4].x,
+          tile.data[m][n4].y,
+          tile.data[m][n4].z,
+          tile.data[m][n4].w);
+    }
+  }
+}
+
+#endif // DEBUG_MODE
+
+#endif // LINEAR_FP_OUTPUT_TILE_GLSLH