pytorch
diff --git a/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/test_llama.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.ci/scripts/test_llava.sh‎
Lines changed: 1 addition & 2 deletions b/‎.ci/scripts/test_llava.sh‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/vulkan/test/vulkan_compute_api_test.cpp‎
Lines changed: 7 additions & 3 deletions b/‎backends/vulkan/test/vulkan_compute_api_test.cpp‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm‎
Lines changed: 2 additions & 3 deletions b/‎examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎examples/models/llama/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎examples/models/llama/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/models/llama/main.cpp‎
Lines changed: 2 additions & 7 deletions b/‎examples/models/llama/main.cpp‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎examples/models/llama/runner/CMakeLists.txt‎
Lines changed: 12 additions & 9 deletions b/‎examples/models/llama/runner/CMakeLists.txt‎
Lines changed: 12 additions & 9 deletions
@@ -42,7 +42,6 @@ build_llama_runner() {
     popd
     ANDROID_ABI=arm64-v8a
     cmake -DBUCK2="${BUCK2}" \
-    -DBUILD_TESTING=OFF \
     -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake  \
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
 
@@ -169,7 +169,6 @@ cmake_build_llama_runner() {
     popd
     dir="examples/models/llama"
     retry cmake \
-        -DBUILD_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -Bcmake-out/${dir} \
 
@@ -40,7 +40,6 @@ cmake --build cmake-out -j16 --target install --config Release
 
 # Install llama runner with torchao
 cmake -DPYTHON_EXECUTABLE=python \
-    -DBUILD_TESTING=OFF \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
 
@@ -64,10 +64,9 @@ cmake_install_executorch_libraries_for_android() {
 
 
 LLAVA_COMMON_CMAKE_ARGS="                        \
-        -DBUILD_TESTING=OFF                      \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}      \
-        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}   \
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}         \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON     \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON  \
         -DEXECUTORCH_BUILD_XNNPACK=ON"
 
@@ -3069,8 +3069,10 @@ void test_to_copy() {
 
   EXPECT_EQ(data_in.size(), output_data.size());
 
+#ifdef VULKAN_DEBUG
   float mse_ex = 0.0f;
   float mse_vk = 0.0f;
+#endif
 
   // check results
   for (size_t i = 0; i < output_data.size(); ++i) {
@@ -3092,6 +3094,9 @@ void test_to_copy() {
         std::bitset<16>(*output_bits).to_string() + ")";
 
     std::cout << msg << std::endl;
+
+    mse_ex += std::pow(expected_output - input, 2);
+    mse_vk += std::pow(output - input, 2);
 #endif
 
     // Note: Torch executor half "rounds up" when converting to fp16 whereas
@@ -3113,13 +3118,12 @@ void test_to_copy() {
     EXPECT_TRUE(
         (*output_bits == *expected_bits) ||
         /*rounding error*/ ((*output_bits + 1u) == *expected_bits));
-    mse_ex += std::pow(expected_output - input, 2);
-    mse_vk += std::pow(output - input, 2);
   }
 
+#ifdef VULKAN_DEBUG
   mse_ex /= output_data.size();
   mse_vk /= output_data.size();
-#ifdef VULKAN_DEBUG
+
   std::cout << "========================================================="
             << std::endl;
   std::cout << "mse_ex = " << mse_ex << ", mse_vk = " << mse_vk << std::endl;
 
@@ -14,7 +14,6 @@
 
 using executorch::extension::llm::GenerationConfig;
 using executorch::extension::llm::Image;
-using executorch::extension::llm::TextLLMRunner;
 using executorch::runtime::Error;
 
 NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -24,15 +23,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
 @end
 
 @implementation LLaMARunner {
-  std::unique_ptr<TextLLMRunner> _runner;
+  std::unique_ptr<example::Runner> _runner;
 }
 
 - (instancetype)initWithModelPath:(NSString*)modelPath
                     tokenizerPath:(NSString*)tokenizerPath {
   self = [super init];
   if (self) {
     [ExecuTorchLog.sharedLog addSink:self];
-    _runner = example::create_llama_runner(
+    _runner = example::Runner::create(
         modelPath.UTF8String, tokenizerPath.UTF8String);
   }
   return self;
 
@@ -220,6 +220,7 @@ endif()
 target_include_directories(
   llama_main
   PUBLIC ${_common_include_directories}
+         ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
 target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
@@ -81,13 +81,8 @@ int32_t main(int32_t argc, char** argv) {
   }
 #endif
   // create llama runner
-  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
-      example::create_llama_runner(model_path, tokenizer_path, data_path);
-
-  if (runner == nullptr) {
-    ET_LOG(Error, "Failed to create llama runner");
-    return 1;
-  }
+  std::unique_ptr<example::Runner> runner =
+      example::Runner::create(model_path, tokenizer_path, data_path);
 
   if (warmup) {
     runner->warmup(prompt, /*max_new_tokens=*/seq_len);
 
@@ -52,20 +52,23 @@ else()
   add_library(llama_runner SHARED ${_llama_runner__srcs})
 endif()
 
-# For extension_llm_runner
-if(NOT TARGET extension_llm_runner)
-  add_subdirectory(
-    ${EXECUTORCH_ROOT}/extension/llm/runner
-    ${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
-  )
-endif()
-
 set(llama_runner_deps executorch_core extension_data_loader extension_module
-                      extension_tensor extension_flat_tensor extension_llm_runner
+                      extension_tensor extension_flat_tensor
 )
 
 target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
 
+target_include_directories(
+  llama_runner
+  INTERFACE ${_common_include_directories}
+)
+
+# Include tokenizers dependency
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+add_subdirectory(
+  ${EXECUTORCH_ROOT}/extension/llm/tokenizers
+  ${CMAKE_CURRENT_BINARY_DIR}/tokenizers
+)
 target_link_libraries(
   llama_runner PUBLIC tokenizers
 )
Original file line number	Diff line number	Diff line change
`@@ -220,6 +220,7 @@ endif()`
`220`	`220`	`target_include_directories(`
`221`	`221`	`llama_main`
`222`	`222`	`PUBLIC ${_common_include_directories}`
	`223`	`+ ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include`
`223`	`224`	`)`
`224`	`225`	`target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})`
`225`	`226`	`target_compile_options(llama_main PUBLIC ${_common_compile_options})`