pytorch
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/apple/mps/setup.md‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/mps/setup.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/build_cadence_fusionG3.sh‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/build_cadence_fusionG3.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/build_cadence_hifi4.sh‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/build_cadence_hifi4.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/reference/operators/quantized_conv_out.cpp‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/reference/operators/quantized_conv_out.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/docs/android_demo.md‎
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/docs/android_demo.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl‎
Lines changed: 3 additions & 1 deletion b/‎backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/xnnpack/README.md‎
Lines changed: 1 addition & 1 deletion b/‎backends/xnnpack/README.md‎
Lines changed: 1 addition & 1 deletion
@@ -17,17 +17,17 @@ retry () {
 }
 
 clean_executorch_install_folders() {
-  ./install_requirements.sh --clean
+  ./install_executorch.sh --clean
 }
 
 install_executorch() {
   which pip
   # Install executorch, this assumes that Executorch is checked out in the
   # current directory.
   if [[ "${1:-}" == "use-pt-pinned-commit" ]]; then
-    ./install_requirements.sh --pybind xnnpack --use-pt-pinned-commit
+    ./install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
   else
-    ./install_requirements.sh --pybind xnnpack
+    ./install_executorch.sh --pybind xnnpack
   fi
   # Just print out the list of packages for debugging
   pip list
 
@@ -9,7 +9,7 @@ on:
     paths:
       - .ci/scripts/setup-ios.sh
       - .github/workflows/apple.yml
-      - install_requirements.sh
+      - install_executorch.sh
       - backends/apple/**
       - build/build_apple_frameworks.sh
       - build/build_apple_llm_demo.sh
 
@@ -200,7 +200,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install Llava requirements
         bash examples/models/llama/install_requirements.sh
@@ -436,7 +436,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
@@ -463,7 +463,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -490,7 +490,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -517,7 +517,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
@@ -97,7 +97,7 @@ I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successf
 ### [Optional] Run the generated model directly using pybind
 1. Make sure `pybind` MPS support was installed:
 ```bash
-./install_requirements.sh --pybind mps
+./install_executorch.sh --pybind mps
 ```
 2. Run the `mps_example` script to trace the model and run it directly from python:
 ```bash
 
@@ -12,7 +12,7 @@ unset XTENSA_CORE
 export XTENSA_CORE=FCV_FG3GP
 git submodule sync
 git submodule update --init
-./install_requirements.sh
+./install_executorch.sh
 
 rm -rf cmake-out
 
 
@@ -12,7 +12,7 @@ unset XTENSA_CORE
 export XTENSA_CORE=nxp_rt600_RI23_11_newlib
 git submodule sync
 git submodule update --init
-./install_requirements.sh
+./install_executorch.sh
 
 rm -rf cmake-out
 
 
@@ -119,7 +119,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
                     if (((_h + d0 * _wh - p0) >= 0) &&
                         ((_h + d0 * _wh - p0) < h) &&
                         ((_w + d1 * _ww - p1) >= 0) &&
-                        ((_w + d1 * _ww - p1 < w))) {
+                        ((_w + d1 * _ww - p1) < w)) {
                       int ioff =
                           (_h + d0 * _wh - p0) * w + (_w + d1 * _ww - p1);
                       int woff = _wh * ww + _ww;
 
@@ -81,7 +81,7 @@ First, build and install ExecuTorch libraries, then build the LLaMA runner
 binary using the Android NDK toolchain.
 
 ```shell
-./install_requirements.sh --clean
+./install_executorch.sh --clean
 (mkdir cmake-android-out && \
   cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
 
@@ -33,7 +33,9 @@ ${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
 // shared memory to hold calculated positions, this would reduce register usage thus improving performance.
-shared ivec2 pos_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z * TILE_SIZE * TILE_SIZE];
+// 64 is the number of threads in the local wg
+$num_shared = 64 * TILE_SIZE * TILE_SIZE
+shared ivec2 pos_shared[${num_shared}];
 
 /*
  * Computes a 2D pointwise convolution of an NxN output tile. Calculating an
 
@@ -98,7 +98,7 @@ After exporting the XNNPACK Delegated model, we can now try running it with exam
 cd executorch
 
 # Get a clean cmake-out directory
-./install_requirements.sh --clean
+./install_executorch.sh --clean
 mkdir cmake-out
 
 # Configure cmake