diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index f42a20e22..87e0825b3 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -434,7 +434,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10.11
+          python-version: '3.10.11'
       - name: Setup Xcode
         if: runner.os == 'macOS'
         uses: maxim-lobanov/setup-xcode@v1
@@ -577,7 +577,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10.11
+          python-version: '3.10.11'
       - name: Print machine info
         run: |
           uname -a
@@ -625,6 +625,7 @@ jobs:
     with:
       runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
       script: |
+        export PYTHON_VERSION="3.10"
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
         # as this script does not install anything into conda env but rather as system dep
@@ -737,6 +738,7 @@ jobs:
     with:
       runner: macos-m1-stable  # needs MPS, was macos-m1-stable
       script: |
+        export PYTHON_VERSION="3.10"
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
         # as this script does not install anything into conda env but rather as system dep
@@ -914,31 +916,19 @@ jobs:
         continue-on-error: true
         run: |
           echo "Installing ExecuTorch"
-          bash torchchat/utils/scripts/build_native.sh et
-      - name: Install ET pip
+          bash torchchat/utils/scripts/install_et.sh
+      - name: Install ExecuTorch python
         run: |
-          echo "ET build directory"
-          ls et-build | cat
-
+          echo "Install ExecuTorch python"
           pushd et-build/src/executorch
-          if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then
-            echo "Mismatched hash.  Make sure branch install_et.sh matches branch from Github cache."
-            echo "On commit $(git rev-parse HEAD)"
-            echo "Expected commit ${{env.et-git-hash}}"
-            exit 1
-          fi
-          pip install .
+          chmod +x ./install_requirements.sh
+          chmod +x ./install_requirements.py
+          ./install_requirements.sh
           popd
       - name: Install runner
         run: |
-          # Pull submodules (re2, abseil) for Tiktoken
-          git submodule sync
-          git submodule update --init
-
-          export TORCHCHAT_ROOT=${PWD}
-          cmake -S . -B ./cmake-out -G Ninja
-          cmake --build ./cmake-out --target et_run
-
+          echo "Installing runner"
+          bash torchchat/utils/scripts/build_native.sh et
       - name: Run inference
         run: |
           python torchchat.py download stories15M
diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt
index a6f1373dd..01c77f102 100644
--- a/install/.pins/et-pin.txt
+++ b/install/.pins/et-pin.txt
@@ -1 +1 @@
-91298923a0076c1b41059efb6dad2876426e4b03
+af098c31b6f8d5f38e40a5cf35784b0969d97df8
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
index b698315ff..47fd5b36d 100755
--- a/install/install_requirements.sh
+++ b/install/install_requirements.sh
@@ -47,10 +47,10 @@ fi
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20240814
+PYTORCH_NIGHTLY_VERSION=dev20240901
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20240814
+VISION_NIGHTLY_VERSION=dev20240901
 
 # Nightly version for torchtune
 TUNE_NIGHTLY_VERSION=dev20240916
diff --git a/runner/build_android.sh b/runner/build_android.sh
index c32185957..c0ad02d7b 100755
--- a/runner/build_android.sh
+++ b/runner/build_android.sh
@@ -24,8 +24,6 @@ export CMAKE_OUT_DIR="cmake-out-android"
 export EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT="OFF"
 export EXECUTORCH_BUILD_KERNELS_CUSTOM="ON"
 export CMAKE_OUT_DIR="cmake-out-android"
-# export DCMAKE_INSTALL_PREFIX=cmake-out-android
-#
 
 build_runner_et() {
   rm -rf cmake-out-android
@@ -43,5 +41,5 @@ install_executorch_python_libs $ENABLE_ET_PYBIND
 export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake
 export ANDROID_ABI=arm64-v8a
 export ANDROID_PLATFORM=android-23
-install_executorch
+install_executorch_cpp_libs
 build_runner_et
diff --git a/runner/et.cmake b/runner/et.cmake
index 7fc16b1f2..99e67a025 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -62,13 +62,15 @@ if(executorch_FOUND)
 
     set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch)
     set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack)
-    list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
+    list(APPEND _srcs ${EXECUTORCH_SRC_ROOT}/extension/threadpool/cpuinfo_utils.cpp)
     list(APPEND _common_include_directories
          ${XNNPACK_ROOT}/third-party/cpuinfo/include)
 
     list(APPEND _common_include_directories
          ${XNNPACK_ROOT}/third-party/pthreadpool/include)
   endif()
+  add_library(custom_ops STATIC IMPORTED)
+  set_property(TARGET custom_ops PROPERTY IMPORTED_LOCATION ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a)
 
   target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
   add_executable(et_run ${_srcs})
@@ -80,7 +82,9 @@ if(executorch_FOUND)
     et_run PRIVATE
     executorch
     extension_module
+    extension_tensor
     extension_data_loader
+    extension_threadpool
     optimized_kernels
     quantized_kernels
     portable_kernels
@@ -93,10 +97,13 @@ if(executorch_FOUND)
     XNNPACK
     pthreadpool
     cpuinfo
+    custom_ops
   )
   target_link_options_shared_lib(optimized_native_cpu_ops_lib)
   target_link_options_shared_lib(quantized_ops_lib)
   target_link_options_shared_lib(xnnpack_backend)
+  target_link_options_shared_lib(custom_ops)
+
   # Not clear why linking executorch as whole-archive outside android/apple is leading
   # to double registration. Most likely because of linkage issues.
   # Will figure this out later. Until then use this.
@@ -104,28 +111,11 @@ if(executorch_FOUND)
     target_link_options_shared_lib(executorch)
   endif()
 
-  target_link_libraries(et_run PRIVATE
-  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a>")
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)
   endif()
 
-  # Adding target_link_options_shared_lib as commented out below leads to this:
-  #
-  # CMake Error at Utils.cmake:22 (target_link_options):
-  #   Cannot specify link options for target
-  #   "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
-  #   which is not built by this project.
-  # Call Stack (most recent call first):
-  #   Utils.cmake:30 (macos_kernel_link_options)
-  #   CMakeLists.txt:41 (target_link_options_shared_lib)
-  #
-  #target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch
-
-  # This works on mac, but appears to run into issues on linux
-  # It is needed to solve:
-  # E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
 else()
   MESSAGE(WARNING "ExecuTorch package not found")
 endif()
diff --git a/runner/run.cpp b/runner/run.cpp
index 999ad2fcc..52d596749 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -39,19 +39,20 @@ torch::Device aoti_device(torch::kCPU);
 
 #else // __ET_MODEL__
 #include <executorch/extension/module/module.h>
-#include <executorch/extension/runner_util/managed_tensor.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 
 #if defined(ET_USE_ADAPTIVE_THREADS)
-#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
-#include <executorch/backends/xnnpack/threadpool/threadpool.h>
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
 #endif
 
 using exec_aten::ScalarType;
 using torch::executor::EValue;
-using torch::executor::ManagedTensor;
+using executorch::extension::TensorPtr;
+using executorch::extension::make_tensor_ptr;
 using torch::executor::Module;
 using torch::executor::Result;
 #endif
@@ -212,11 +213,11 @@ float* forward(Transformer* transformer, int token, int pos) {
                              .to(torch::kCPU);
   auto logits = result[0].data_ptr();
 #else // __ET_MODEL__
-  ManagedTensor pos_managed(pos_buffer, {1}, ScalarType::Long);
-  ManagedTensor tokens_managed(token_buffer, {1, 1}, ScalarType::Long);
+  TensorPtr pos_managed = make_tensor_ptr({1}, pos_buffer, ScalarType::Long);
+  TensorPtr tokens_managed = make_tensor_ptr({1, 1}, token_buffer, ScalarType::Long);
   std::vector<EValue> inputs;
-  auto tmp1 = EValue(tokens_managed.get_aliasing_tensor());
-  auto tmp2 = EValue(pos_managed.get_aliasing_tensor());
+  auto tmp1 = EValue(tokens_managed);
+  auto tmp2 = EValue(pos_managed);
 
   inputs.push_back(tmp1);
   inputs.push_back(tmp2);
diff --git a/torchchat/export.py b/torchchat/export.py
index affb8b871..b28e8023f 100644
--- a/torchchat/export.py
+++ b/torchchat/export.py
@@ -194,7 +194,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None):
             return self.wo(output)
 
     def replace_attention_with_custom_sdpa_attention(module: nn.Module):
-        from executorch.examples.models.llama2.custom_ops import (  # noqa
+        from executorch.extension.llm.custom_ops import (  # noqa
             sdpa_with_kv_cache,
         )
 
@@ -304,7 +304,6 @@ def export_for_et(model, device, output_path) -> str:
         edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner())
         export_program = edge_manager.to_executorch(
             ExecutorchBackendConfig(
-                extract_constant_segment=True,
                 extract_delegate_segments=True,
                 passes=[
                     QuantFusionPass(),
diff --git a/torchchat/model.py b/torchchat/model.py
index 79bd1f188..81c06e495 100644
--- a/torchchat/model.py
+++ b/torchchat/model.py
@@ -932,7 +932,10 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor:
     from executorch.extension.pybindings import portable_lib as exec_lib
 
     # ET changed the way it's loading the custom ops so it's not included in portable_lib but has to be loaded separately.
-    from executorch.examples.models.llama2.custom_ops import sdpa_with_kv_cache  # no-qa
+    # For quantized_decomposed ops
+    from executorch.kernels import quantized  # no-qa
+    # For llama::sdpa_with_kv_cache.out, preprocess ops
+    from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # no-qa
 
     class PTEModel(nn.Module):
         def __init__(self, config, path) -> None:
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index aacd97415..924b86a65 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -25,6 +25,7 @@ if [ $# -eq 0 ]; then
     show_help
     exit 1
 fi
+
 while (( "$#" )); do
   case "$1" in
     -h|--help)
@@ -49,15 +50,7 @@ while (( "$#" )); do
   esac
 done
 
-if [ -z "${TORCHCHAT_ROOT}" ]; then
-    # Get the absolute path of the current script
-    SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-    # Get the absolute path of the parent directory
-    TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")"
-    source "$TORCHCHAT_ROOT/scripts/install_utils.sh"
-else
-    source "$TORCHCHAT_ROOT/torchchat/utils/scripts/install_utils.sh"
-fi
+source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh"
 
 if [ -z "${ET_BUILD_DIR}" ]; then
     ET_BUILD_DIR="et-build"
@@ -68,16 +61,17 @@ pushd ${TORCHCHAT_ROOT}
 git submodule update --init
 git submodule sync
 if [[ "$TARGET" == "et" ]]; then
-    find_cmake_prefix_path
-    install_pip_dependencies
-    clone_executorch
-    install_executorch_libs false
+  if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" ]; then
+    echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install does not exist."
+    echo "Make sure you run install_executorch_libs"
+    exit 1
+  fi
 fi
 popd
 
 # CMake commands
 if [[ "$TARGET" == "et" ]]; then
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
 else
     cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
 fi
diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh
index 22c3ac80a..1d8c6e2b2 100755
--- a/torchchat/utils/scripts/install_et.sh
+++ b/torchchat/utils/scripts/install_et.sh
@@ -17,7 +17,7 @@ ENABLE_ET_PYBIND="${1:-true}"
 
 pushd ${TORCHCHAT_ROOT}
 find_cmake_prefix_path
-install_pip_dependencies
 clone_executorch
 install_executorch_libs $ENABLE_ET_PYBIND
+install_executorch_python_libs $ENABLE_ET_PYBIND
 popd
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index f915402e7..2da3d044c 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -8,8 +8,10 @@
 set -ex pipefail
 
 if [ -z "$TORCHCHAT_ROOT" ]; then
-  echo "Defaulting TORCHCHAT_ROOT to $PWD since it is unset."
-  TORCHCHAT_ROOT=$PWD
+  # Get the absolute path of the current script
+  SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+  TORCHCHAT_ROOT="$SCRIPT_PATH/../../.."
+  echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset."
 fi
 
 install_pip_dependencies() {
@@ -73,6 +75,7 @@ clone_executorch() {
   clone_executorch_internal
 }
 
+
 install_executorch_python_libs() {
   if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}" ]; then
     echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR} does not exist."
@@ -102,9 +105,10 @@ COMMON_CMAKE_ARGS="\
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_XNNPACK=ON"
+    -DEXECUTORCH_BUILD_XNNPACK=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
 
-install_executorch() {
+install_executorch_cpp_libs() {
   # AOT lib has to be build for model export
   # So by default it is built, and you can explicitly opt-out
   EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF
@@ -144,7 +148,6 @@ install_executorch() {
         -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=${EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR} \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=${EXECUTORCH_BUILD_KERNELS_CUSTOM_VAR} \
-        -DEXECUTORCH_BUILD_XNNPACK=ON \
         ${CROSS_COMPILE_ARGS} \
         -S . -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build ${CMAKE_OUT_DIR}
@@ -153,12 +156,8 @@ install_executorch() {
 }
 
 install_executorch_libs() {
-  # Install executorch python and C++ libs
-  export CMAKE_ARGS="\
-    ${COMMON_CMAKE_ARGS} \
-    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-    -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install"
-  export CMAKE_BUILD_ARGS="--target install"
-
+  EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF
+  EXECUTORCH_BUILD_KERNELS_CUSTOM_VAR=OFF
+  install_executorch_cpp_libs
   install_executorch_python_libs $1
 }