pytorch · shoumikhin · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025
diff --git a/.Package.swift/kernels_custom/dummy.swift → .Package.swift/kernels_llm/dummy.swift b/.Package.swift/kernels_custom/dummy.swift → .Package.swift/kernels_llm/dummy.swift
diff --git a/...ge.swift/kernels_custom_debug/dummy.swift → .Package.swift/kernels_llm_debug/dummy.swift b/...ge.swift/kernels_custom_debug/dummy.swift → .Package.swift/kernels_llm_debug/dummy.swift
diff --git a/.ci/scripts/test_llama_torchao_lowbit.sh b/.ci/scripts/test_llama_torchao_lowbit.sh
@@ -35,15 +35,15 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -Bcmake-out .
 cmake --build cmake-out -j16 --target install --config Release
 
 # Install llama runner with torchao
 cmake -DPYTHON_EXECUTABLE=python \
     -DBUILD_TESTING=OFF \
     -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh
@@ -39,7 +39,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS="                      \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON      \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON        \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=ON        \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON     \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON     \
         -DEXECUTORCH_BUILD_XNNPACK=ON               \
@@ -69,7 +69,7 @@ LLAVA_COMMON_CMAKE_ARGS="                        \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}      \
         -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}   \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON     \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=ON     \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON  \
         -DEXECUTORCH_BUILD_XNNPACK=ON"
 

diff --git a/.ci/scripts/test_phi_3_mini.sh b/.ci/scripts/test_phi_3_mini.sh
@@ -33,7 +33,7 @@ cmake_install_executorch_libraries() {
       -DEXECUTORCH_BUILD_XNNPACK=ON \
       -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
       -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-      -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+      -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
       -B${BUILD_DIR} .
 
   cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
@@ -43,7 +43,7 @@ cmake_build_phi_3_mini() {
   cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
       -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
       -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-      -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+      -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
       -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
       -DEXECUTORCH_BUILD_XNNPACK=ON \
       -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml
@@ -152,7 +152,7 @@ jobs:
           "backend_coreml"
           "backend_mps"
           "backend_xnnpack"
-          "kernels_custom"
+          "kernels_llm"
           "kernels_optimized"
           "kernels_quantized"
           "threadpool"

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -645,7 +645,7 @@ jobs:
           -DEXECUTORCH_BUILD_XNNPACK=ON \
           -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
           -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-          -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+          -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
           -DEXECUTORCH_BUILD_DEVTOOLS=ON \
           -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
           -Bcmake-out .

@@ -677,7 +677,7 @@ if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_KERNELS_LLM)
   # TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)
 endif()
@@ -704,7 +704,7 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
     list(APPEND _executor_runner_libs quantized_ops_lib)
   endif()
 
-  if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+  if(EXECUTORCH_BUILD_KERNELS_LLM)
     list(APPEND _executor_runner_libs $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
   endif()
 

diff --git a/Package.swift b/Package.swift
@@ -69,7 +69,7 @@ let products = deliverables([
       "c++",
     ],
   ],
-  "kernels_custom": [:],
+  "kernels_llm": [:],
   "kernels_optimized": [
     "frameworks": [
       "Accelerate",

diff --git a/backends/cadence/build_cadence_fusionG3.sh b/backends/cadence/build_cadence_fusionG3.sh
@@ -47,7 +47,7 @@ if $STEPWISE_BUILD; then
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DEXECUTORCH_BUILD_PORTABLE_OPS=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_FUSION_G3_OPT=ON \
         -DHAVE_FNMATCH_H=OFF \
@@ -72,7 +72,7 @@ else
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DEXECUTORCH_BUILD_PORTABLE_OPS=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_FUSION_G3_OPT=ON \
         -DHAVE_FNMATCH_H=OFF \

diff --git a/backends/cadence/build_cadence_hifi4.sh b/backends/cadence/build_cadence_hifi4.sh
@@ -46,7 +46,7 @@ if $STEPWISE_BUILD; then
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DEXECUTORCH_BUILD_PORTABLE_OPS=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_NNLIB_OPT=ON \
         -DHAVE_FNMATCH_H=OFF \
@@ -70,7 +70,7 @@ else
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DEXECUTORCH_BUILD_PORTABLE_OPS=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_NNLIB_OPT=ON \
         -DHAVE_FNMATCH_H=OFF \

@@ -91,7 +91,7 @@ binary using the Android NDK toolchain.
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_VULKAN=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DPYTHON_EXECUTABLE=python \
     -Bcmake-android-out && \
   cmake --build cmake-android-out -j16 --target install)
@@ -102,7 +102,7 @@ binary using the Android NDK toolchain.
     -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
     -DANDROID_ABI=$ANDROID_ABI \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DPYTHON_EXECUTABLE=python \
     -Bcmake-android-out/examples/models/llama && \

@@ -58,7 +58,7 @@ llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p <path_to_params.json> -c <pat
         -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
         -Bcmake-android-out .
 
     cmake --build cmake-android-out -j16 --target install --config Release
@@ -73,7 +73,7 @@ llama3/Meta-Llama-3-8B-Instruct/tokenizer.model -p <path_to_params.json> -c <pat
         -DEXECUTORCH_BUILD_QNN=ON \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
         -Bcmake-android-out/examples/models/llama examples/models/llama
 
     cmake --build cmake-android-out/examples/models/llama -j16 --config Release

@@ -376,7 +376,7 @@ cmake . `
   -DEXECUTORCH_BUILD_FLATC=ON `
   -DEXECUTORCH_BUILD_PYBIND=OFF `
   -DEXECUTORCH_BUILD_XNNPACK=ON `
-  -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON `
+  -DEXECUTORCH_BUILD_KERNELS_LLM=ON `
   -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
   -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON `
   -DEXECUTORCH_ENABLE_LOGGING=ON `

@@ -10,7 +10,7 @@ The ExecuTorch Runtime for iOS and macOS (ARM64) is distributed as a collection
 * `backend_coreml` - Core ML backend
 * `backend_mps` - MPS backend
 * `backend_xnnpack` - XNNPACK backend
-* `kernels_custom` - Custom kernels for LLMs
+* `kernels_llm` - Custom kernels for LLMs
 * `kernels_optimized` - Accelerated generic CPU kernels
 * `kernels_quantized` - Quantized kernels
 

@@ -69,7 +69,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -Bcmake-out .
 cmake --build cmake-out -j16 --target install --config Release
 ```
@@ -86,7 +86,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_QNN=ON \
     -Bcmake-out/examples/models/llama \
@@ -238,4 +238,4 @@ If the app successfully run on your device, you should see something like below:
 </p>
 
 ## Reporting Issues
-If you encountered any bugs or issues following this tutorial please file a bug/issue here on Github.
+If you encountered any bugs or issues following this tutorial please file a bug/issue here on Github.
@@ -30,8 +30,8 @@
 		03CF439E2CEC5CEC00C7113B /* backend_xnnpack in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF439D2CEC5CEC00C7113B /* backend_xnnpack */; };
 		03CF43A02CEC5CEC00C7113B /* backend_xnnpack_debug in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF439F2CEC5CEC00C7113B /* backend_xnnpack_debug */; };
 		03CF43A22CEC5CEC00C7113B /* executorch_debug in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A12CEC5CEC00C7113B /* executorch_debug */; };
-		03CF43A42CEC5CEC00C7113B /* kernels_custom in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A32CEC5CEC00C7113B /* kernels_custom */; };
-		03CF43A62CEC5CEC00C7113B /* kernels_custom_debug in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A52CEC5CEC00C7113B /* kernels_custom_debug */; };
+		03CF43A42CEC5CEC00C7113B /* kernels_llm in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A32CEC5CEC00C7113B /* kernels_llm */; };
+		03CF43A62CEC5CEC00C7113B /* kernels_llm_debug in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A52CEC5CEC00C7113B /* kernels_llm_debug */; };
 		03CF43A82CEC5CEC00C7113B /* kernels_optimized in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A72CEC5CEC00C7113B /* kernels_optimized */; };
 		03CF43AA2CEC5CEC00C7113B /* kernels_optimized_debug in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43A92CEC5CEC00C7113B /* kernels_optimized_debug */; };
 		03CF43B02CEC5CEC00C7113B /* kernels_quantized in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43AF2CEC5CEC00C7113B /* kernels_quantized */; };
@@ -185,8 +185,8 @@
 				03CF439E2CEC5CEC00C7113B /* backend_xnnpack in Frameworks */,
 				03CF43A02CEC5CEC00C7113B /* backend_xnnpack_debug in Frameworks */,
 				03CF43A22CEC5CEC00C7113B /* executorch_debug in Frameworks */,
-				03CF43A42CEC5CEC00C7113B /* kernels_custom in Frameworks */,
-				03CF43A62CEC5CEC00C7113B /* kernels_custom_debug in Frameworks */,
+				03CF43A42CEC5CEC00C7113B /* kernels_llm in Frameworks */,
+				03CF43A62CEC5CEC00C7113B /* kernels_llm_debug in Frameworks */,
 				03CF43A82CEC5CEC00C7113B /* kernels_optimized in Frameworks */,
 				03CF43AA2CEC5CEC00C7113B /* kernels_optimized_debug in Frameworks */,
 				03CF43B02CEC5CEC00C7113B /* kernels_quantized in Frameworks */,
@@ -481,8 +481,8 @@
 				03CF439D2CEC5CEC00C7113B /* backend_xnnpack */,
 				03CF439F2CEC5CEC00C7113B /* backend_xnnpack_debug */,
 				03CF43A12CEC5CEC00C7113B /* executorch_debug */,
-				03CF43A32CEC5CEC00C7113B /* kernels_custom */,
-				03CF43A52CEC5CEC00C7113B /* kernels_custom_debug */,
+				03CF43A32CEC5CEC00C7113B /* kernels_llm */,
+				03CF43A52CEC5CEC00C7113B /* kernels_llm_debug */,
 				03CF43A72CEC5CEC00C7113B /* kernels_optimized */,
 				03CF43A92CEC5CEC00C7113B /* kernels_optimized_debug */,
 				03CF43AF2CEC5CEC00C7113B /* kernels_quantized */,
@@ -948,7 +948,7 @@
 			isa = XCRemoteSwiftPackageReference;
 			repositoryURL = "https://github.com/pytorch/executorch";
 			requirement = {
-				branch = "swiftpm-0.8.0.20250702";
+				branch = "swiftpm-0.8.0.20250711";
 				kind = branch;
 			};
 		};
@@ -990,15 +990,15 @@
 			package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;
 			productName = executorch_debug;
 		};
-		03CF43A32CEC5CEC00C7113B /* kernels_custom */ = {
+		03CF43A32CEC5CEC00C7113B /* kernels_llm */ = {
 			isa = XCSwiftPackageProductDependency;
 			package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;
-			productName = kernels_custom;
+			productName = kernels_llm;
 		};
-		03CF43A52CEC5CEC00C7113B /* kernels_custom_debug */ = {
+		03CF43A52CEC5CEC00C7113B /* kernels_llm_debug */ = {
 			isa = XCSwiftPackageProductDependency;
 			package = 03CF43942CEC5CEC00C7113B /* XCRemoteSwiftPackageReference "executorch" */;
-			productName = kernels_custom_debug;
+			productName = kernels_llm_debug;
 		};
 		03CF43A72CEC5CEC00C7113B /* kernels_optimized */ = {
 			isa = XCSwiftPackageProductDependency;

@@ -7,7 +7,7 @@ OTHER_LDFLAGS = $(inherited) \
     -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_debug_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_debug_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_debug_$(ET_PLATFORM).a \
-    -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_debug_$(ET_PLATFORM).a \
+    -force_load $(BUILT_PRODUCTS_DIR)/libkernels_llm_debug_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_debug_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_debug_$(ET_PLATFORM).a \
     @$(TEMP_DIR)/cmake/linker_flags

@@ -9,7 +9,7 @@ OTHER_LDFLAGS = $(inherited) \
     -force_load $(BUILT_PRODUCTS_DIR)/libbackend_coreml_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libbackend_mps_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libbackend_xnnpack_$(ET_PLATFORM).a \
-    -force_load $(BUILT_PRODUCTS_DIR)/libkernels_custom_$(ET_PLATFORM).a \
+    -force_load $(BUILT_PRODUCTS_DIR)/libkernels_llm_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libkernels_optimized_$(ET_PLATFORM).a \
     -force_load $(BUILT_PRODUCTS_DIR)/libkernels_quantized_$(ET_PLATFORM).a \
     @$(TEMP_DIR)/cmake/linker_flags

@@ -147,7 +147,7 @@ The following packages should be linked in your app target `LLaMA` (left side, L
 - backend_coreml
 - backend_mps
 - backend_xnnpack
-- kernels_custom
+- kernels_llm
 - kernels_optimized
 - kernels_portable
 - kernels_quantized

@@ -284,7 +284,7 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -Bcmake-out-android .
 
 cmake --build cmake-out-android -j16 --target install --config Release
@@ -301,7 +301,7 @@ cmake  -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DSUPPORT_REGEX_LOOKAHEAD=ON
     -Bcmake-out-android/examples/models/llama \
     examples/models/llama
@@ -386,7 +386,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -Bcmake-out .
 cmake --build cmake-out -j16 --target install --config Release
 ```
@@ -396,7 +396,7 @@ Next install the llama runner with torchao kernels enabled (similar to step 3.2
 ```
 cmake -DPYTHON_EXECUTABLE=python \
     -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

@@ -107,7 +107,7 @@ endif()
 target_link_options_shared_lib(quantized_ops_lib)
 list(APPEND link_libraries quantized_kernels quantized_ops_lib)
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_KERNELS_LLM)
   list(APPEND link_libraries $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
 endif()
 

@@ -32,7 +32,7 @@ python -m examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-m
      -DEXECUTORCH_BUILD_XNNPACK=ON \
      -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
      -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+     -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
      -Bcmake-out .
 
  cmake --build cmake-out -j16 --target install --config Release
@@ -42,7 +42,7 @@ python -m examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-m
 cmake -DPYTHON_EXECUTABLE=python \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

@@ -120,7 +120,7 @@ if(TARGET vulkan_backend)
   list(APPEND link_libraries vulkan_backend)
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
+if(EXECUTORCH_BUILD_KERNELS_LLM)
   list(APPEND link_libraries $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
 endif()