pytorch
diff --git a/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 51 additions & 14 deletions b/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 51 additions & 14 deletions
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/build-presets.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 30 additions & 0 deletions b/‎.lintrunner.toml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎backends/arm/CMakeLists.txt‎
Lines changed: 55 additions & 53 deletions b/‎backends/arm/CMakeLists.txt‎
Lines changed: 55 additions & 53 deletions
diff --git a/‎backends/arm/TARGETS‎
Lines changed: 13 additions & 0 deletions b/‎backends/arm/TARGETS‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 5 additions & 0 deletions
@@ -48,8 +48,17 @@ DOWNLOADED_PATH=$(
     --model_id "${HF_MODEL_REPO}" \
     --files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
 )
-EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
-# Export model.
+# Build llama runner.
+cmake_install_executorch_libraries
+cmake_build_llama_runner
+
+# Constants.
+RUNTIME_ARGS="--tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
+PROMPT="What happens if you eat watermelon seeds?"
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+
+# Export LoRA PTE file.
+MODEL_NAME="llama_3_2_1B_lora"
 $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
     base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
     base.params="${DOWNLOADED_PATH}/params.json" \
@@ -61,36 +70,64 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
     model.dtype_override="fp32" \
     backend.xnnpack.enabled=true \
     backend.xnnpack.extended_ops=true \
-    export.output_name="${EXPORTED_MODEL_NAME}"
-
-# Build llama runner.
-cmake_install_executorch_libraries
-cmake_build_llama_runner
+    export.output_name="${MODEL_NAME}.pte"
 
-PROMPT="What happens if you eat watermelon seeds?"
 # Run llama runner
-RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
-
 NOW=$(date +"%H:%M:%S")
 echo "Starting to run llama runner at ${NOW}"
 # shellcheck source=/dev/null
-cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_NAME}.pte --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
 NOW=$(date +"%H:%M:%S")
 echo "Finished at ${NOW}"
 
 RESULT=$(cat result.txt)
-EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
-
 if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT}"
+  # Do not clean up files if test passes, as they're re-used in the next test.
   echo "Success"
-  cleanup_files
 else
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT}"
   echo "Failure; results not the same"
+  cleanup_files
+  exit 1
+fi
 
+# Export LoRA PTE, PTD file.
+MODEL_SEPARATE="${MODEL_NAME}_separate"
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${MODEL_SEPARATE}.pte" \
+    export.foundation_weights_file="${MODEL_SEPARATE}.ptd"
+
+# Run llama runner.
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT2=$(cat result2.txt)
+if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT2}"
+  echo "Success"
+  cleanup_files
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT2}"
+  echo "Failure; results not the same"
   cleanup_files
   exit 1
 fi
@@ -6,8 +6,6 @@ on:
     branches:
       - main
       - release/*
-    paths:
-      - .github/workflows/build-presets.yml
   workflow_dispatch:
 
 concurrency:
 
@@ -288,6 +288,7 @@ jobs:
           - test_arm_baremetal: test_models_tosa
           - test_arm_baremetal: test_models_ethos-u55
           - test_arm_baremetal: test_models_ethos-u85
+          - test_arm_baremetal: test_smaller_stories_llama
       fail-fast: false
     with:
       runner: linux.2xlarge.memory
 
@@ -136,6 +136,36 @@ init_command = [
     '--requirement=requirements-lintrunner.txt',
 ]
 
+[[linter]]
+code = 'CMAKEFORMAT'
+include_patterns = [
+    "**/*.cmake",
+    "**/*.cmake.in",
+    "**/CMakeLists.txt",
+]
+exclude_patterns = [
+    'third-party/**',
+    '**/third-party/**',
+]
+command = [
+    'python',
+    '-m',
+    'lintrunner_adapters',
+    'run',
+    'cmake_format_linter',
+    '--',
+    '@{{PATHSFILE}}',
+]
+init_command = [
+    'python',
+    '-m',
+    'lintrunner_adapters',
+    'run',
+    'pip_init',
+    '--dry-run={{DRYRUN}}',
+    '--requirement=requirements-lintrunner.txt',
+]
+
 [[linter]]
 code = 'ETCAPITAL'
 include_patterns = [
 
@@ -284,15 +284,19 @@ if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
   set(TORCHAO_BUILD_CPU_AARCH64 ON)
   set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
 
-  list(APPEND TORCHAO_INCLUDE_DIRS
+  list(
+    APPEND
+    TORCHAO_INCLUDE_DIRS
     ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
     ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
     ${EXECUTORCH_ROOT}/third-party/ao
   )
 
   set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
 
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental)
+  add_subdirectory(
+    ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
+  )
   executorch_target_link_options_shared_lib(torchao_ops_executorch)
   list(APPEND _executorch_kernels torchao_ops_executorch)
 endif()
 
@@ -19,69 +19,71 @@ set(_common_include_directories
 )
 add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
 
-
 # bare metal backend builds
 if(EXECUTORCH_BUILD_ARM_BAREMETAL)
 
-add_compile_options("-Wall" "-Werror")
+  add_compile_options("-Wall" "-Werror")
 
-# Third-party folder and Ethos-U driver inclued
-set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
-set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include")
-include_directories(${DRIVER_ETHOSU_INCLUDE_DIR})
+  # Third-party folder and Ethos-U driver inclued
+  set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
+  set(DRIVER_ETHOSU_INCLUDE_DIR
+      "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include"
+  )
+  include_directories(${DRIVER_ETHOSU_INCLUDE_DIR})
 
-set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
-                           backends/arm/runtime/VelaBinStream.cpp
-)
-list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
+  set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
+                             backends/arm/runtime/VelaBinStream.cpp
+  )
+  list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
 
-add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
-target_link_libraries(
-  executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
-)
+  add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
+  target_link_libraries(
+    executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
+  )
 
-install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
+  install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
 
-# end config for bare metal builds
+  # end config for bare metal builds
 endif()
 
-
-# VGF backend builds 
+# VGF backend builds
 if(EXECUTORCH_BUILD_VGF)
 
-# include libvgf
-set(LIBVGF_PATH "${EXECUTORCH_ROOT}/examples/arm/ethos-u-scratch/ml-sdk-for-vulkan-manifest/sw/vgf-lib/")
-
-set(VULKAN_THIRD_PARTY_PATH ${EXECUTORCH_ROOT}/backends/vulkan/third-party)
-set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers/include)
-set(VOLK_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/volk)
-
-set(LIBVGF_STATIC "${LIBVGF_PATH}/build/src/libvgf.a")
-set(LIBVGF_INCLUDE "${LIBVGF_PATH}/include/")
-
-add_library(vgf STATIC IMPORTED)
-set_property( TARGET vgf PROPERTY IMPORTED_LOCATION "${LIBVGF_STATIC}" )
-target_include_directories(vgf INTERFACE "${LIBVGF_INCLUDE}")
-
-# Add backend delegate for VGF
-set(_vgf_backend_sources backends/arm/runtime/VGFBackend.cpp
-			 backends/arm/runtime/VGFSetup.cpp )
-
-# vgf backend
-list(TRANSFORM  _vgf_backend_sources PREPEND "${EXECUTORCH_ROOT}/")
-add_library(vgf_backend ${_vgf_backend_sources})
-target_include_directories(
-  vgf_backend PUBLIC
-  ${_common_include_directories}
-  ${VULKAN_HEADERS_PATH}
-  ${VOLK_HEADERS_PATH}
-)
-target_compile_options(vgf_backend PRIVATE -DUSE_VULKAN_WRAPPER -DUSE_VULKAN_VOLK)
-
-
-target_link_libraries(vgf_backend PRIVATE executorch_core)
-target_link_libraries(vgf_backend PRIVATE vgf)
-executorch_target_link_options_shared_lib(vgf_backend)
-
-# end config for VGF builds
+  # include libvgf
+  set(LIBVGF_PATH
+      "${EXECUTORCH_ROOT}/examples/arm/ethos-u-scratch/ml-sdk-for-vulkan-manifest/sw/vgf-lib/"
+  )
+
+  set(VULKAN_THIRD_PARTY_PATH ${EXECUTORCH_ROOT}/backends/vulkan/third-party)
+  set(VULKAN_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/Vulkan-Headers/include)
+  set(VOLK_HEADERS_PATH ${VULKAN_THIRD_PARTY_PATH}/volk)
+
+  set(LIBVGF_STATIC "${LIBVGF_PATH}/build/src/libvgf.a")
+  set(LIBVGF_INCLUDE "${LIBVGF_PATH}/include/")
+
+  add_library(vgf STATIC IMPORTED)
+  set_property(TARGET vgf PROPERTY IMPORTED_LOCATION "${LIBVGF_STATIC}")
+  target_include_directories(vgf INTERFACE "${LIBVGF_INCLUDE}")
+
+  # Add backend delegate for VGF
+  set(_vgf_backend_sources backends/arm/runtime/VGFBackend.cpp
+                           backends/arm/runtime/VGFSetup.cpp
+  )
+
+  # vgf backend
+  list(TRANSFORM _vgf_backend_sources PREPEND "${EXECUTORCH_ROOT}/")
+  add_library(vgf_backend ${_vgf_backend_sources})
+  target_include_directories(
+    vgf_backend PUBLIC ${_common_include_directories} ${VULKAN_HEADERS_PATH}
+                       ${VOLK_HEADERS_PATH}
+  )
+  target_compile_options(
+    vgf_backend PRIVATE -DUSE_VULKAN_WRAPPER -DUSE_VULKAN_VOLK
+  )
+
+  target_link_libraries(vgf_backend PRIVATE executorch_core)
+  target_link_libraries(vgf_backend PRIVATE vgf)
+  executorch_target_link_options_shared_lib(vgf_backend)
+
+  # end config for VGF builds
 endif()
@@ -21,6 +21,19 @@ python_library(
         "//executorch/exir/dialects:lib",
     ],
 )
+python_library(
+    name = "common",
+    srcs = [
+        "common/__init__.py",
+        "common/debug.py",
+    ],
+    deps = [
+        "fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
+        "fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
+        "//caffe2:torch",
+        "//executorch/exir:lib",
+    ],
+)
 python_library(
     name = "arm_partitioner",
     srcs = [
 
@@ -4,6 +4,7 @@ python_library(
     name = "passes",
     srcs = glob(["*.py"]),
     deps = [
+        "//executorch/backends/arm:common",
         "//executorch/backends/arm:constants",
         "//executorch/backends/arm:tosa_quant_utils",
         "//executorch/backends/arm:tosa_utils",
 
@@ -35,7 +35,9 @@
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
+from .decompose_expm1_pass import DecomposeExpm1Pass  # noqa
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
+from .decompose_glu_pass import DecomposeGluPass  # noqa
 from .decompose_grouped_conv import DecomposeGroupedConv  # noqa
 from .decompose_groupnorm_pass import DecomposeGroupNormPass  # noqa
 from .decompose_layernorm_pass import DecomposeLayerNormPass  # noqa
 
@@ -40,7 +40,9 @@
     DecomposeCosineSimilarityPass,
     DecomposeDivPass,
     DecomposeEmbeddingPass,
+    DecomposeExpm1Pass,
     DecomposeGeluPass,
+    DecomposeGluPass,
     DecomposeGroupedConv,
     DecomposeGroupNormPass,
     DecomposeLayerNormPass,
@@ -163,6 +165,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         return self._transform(exported_program.graph_module)
 
     def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+        self.add_pass(DecomposeExpm1Pass())
         self.add_pass(DecomposeMaskedFill())
         self.add_pass(DecomposeRoundPass())
         self.add_pass(DecomposeAcoshPass())
@@ -184,6 +187,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(FuseBatchnorm2DPass(exported_program))
         self.add_pass(ConvertMmToBmmPass())
+        self.add_pass(DecomposeGluPass())
         self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeGroupNormPass())
@@ -264,6 +268,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeMeanDimPass(graph_module, self.tosa_spec))
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeCosineSimilarityPass())
+        self.add_pass(DecomposeGluPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeLinearVectorNormPass())