up

metascroy · metascroy · commit ff8ae0b6539f · 2026-01-07T12:00:21.000-08:00
diff --git a/.ci/scripts/test_ane_static_llama.sh b/.ci/scripts/test_ane_static_llama.sh
@@ -59,45 +59,45 @@ cmake -S "${EXECUTORCH_ROOT}" -B "${BUILD_DIR}" \
 
 cmake --build "${BUILD_DIR}" -j --target run_static_llm_coreml --config Release
 
-# Run the C++ runner with the CPU model
-echo "Running C++ runner with CPU model..."
-RUNNER="${BUILD_DIR}/examples/apple/coreml/llama/runner/run_static_llm_coreml"
-MODEL_DIR="${EXECUTORCH_ROOT}/examples/apple/coreml/llama"
-
-# Run the model and capture full output for debugging
-FULL_OUTPUT=$("${RUNNER}" \
-  --model "${MODEL_DIR}/model_cpu.pte" \
-  --params "${MODEL_DIR}/params.json" \
-  --tokenizer "${MODEL_DIR}/tokenizer.model" \
-  --prompt "Once upon a time," \
-  --max_new_tokens 50 2>&1)
-
-echo "Full output:"
-echo "${FULL_OUTPUT}"
-
-# Check that the model produced meaningful output
-# The output should contain: the prompt "Once upon a time," and the continuation including "there was"
-# Due to log interleaving, we check for individual key parts separately
-if [[ "${FULL_OUTPUT}" == *"Once upon a time,"* ]] && [[ "${FULL_OUTPUT}" == *"there"* ]] && [[ "${FULL_OUTPUT}" == *"was"* ]]; then
-  echo "Output contains expected prompt and generated text"
-  echo "C++ runner test passed!"
-else
-  echo "ERROR: Output does not contain expected text"
-  echo "Expected: 'Once upon a time,' followed by 'there' and 'was'"
-  exit 1
-fi
-
-# Run lookahead decoding test (currently produces <unk> tokens on stories, but works with llama)
-echo "Running C++ runner with lookahead decoding..."
-"${RUNNER}" \
-  --model "${MODEL_DIR}/model_cpu.pte" \
-  --params "${MODEL_DIR}/params.json" \
-  --tokenizer "${MODEL_DIR}/tokenizer.model" \
-  --prompt "Once upon a time," \
-  --max_new_tokens 50 \
-  --lookahead
-
-echo "C++ runner lookahead test completed (known issue: produces <unk> tokens)"
+# TODO: enable runner once CoreML bug with caching is fixed
+# # Run the C++ runner with the CPU model
+# echo "Running C++ runner with CPU model..."
+# RUNNER="${BUILD_DIR}/examples/apple/coreml/llama/runner/run_static_llm_coreml"
+# MODEL_DIR="${EXECUTORCH_ROOT}/examples/apple/coreml/llama"
+
+# # Run the model and capture full output for debugging
+# FULL_OUTPUT=$("${RUNNER}" \
+#   --model "${MODEL_DIR}/model.pte" \
+#   --params "${MODEL_DIR}/params.json" \
+#   --tokenizer "${MODEL_DIR}/tokenizer.model" \
+#   --prompt "Once upon a time," \
+#   --max_new_tokens 50 2>&1)
+
+# echo "Full output:"
+# echo "${FULL_OUTPUT}"
+
+# # Check that the model produced meaningful output
+# # The output should contain: the prompt "Once upon a time," and the continuation including "there was"
+# # Due to log interleaving, we check for individual key parts separately
+# if [[ "${FULL_OUTPUT}" == *"Once upon a time,"* ]] && [[ "${FULL_OUTPUT}" == *"there"* ]] && [[ "${FULL_OUTPUT}" == *"was"* ]]; then
+#   echo "Output contains expected prompt and generated text"
+#   echo "C++ runner test passed!"
+# else
+#   echo "ERROR: Output does not contain expected text"
+#   echo "Expected: 'Once upon a time,' followed by 'there' and 'was'"
+#   exit 1
+# fi
+
+# TODO: enable runner once CoreML bug with caching is fixed
+# # Run lookahead decoding test (currently produces <unk> tokens on stories, but works with llama)
+# echo "Running C++ runner with lookahead decoding..."
+# "${RUNNER}" \
+#   --model "${MODEL_DIR}/model.pte" \
+#   --params "${MODEL_DIR}/params.json" \
+#   --tokenizer "${MODEL_DIR}/tokenizer.model" \
+#   --prompt "Once upon a time," \
+#   --max_new_tokens 50 \
+#   --lookahead
 
 # Test export of deprecated model
 pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
diff --git a/examples/apple/coreml/llama/runner/CMakeLists.txt b/examples/apple/coreml/llama/runner/CMakeLists.txt
@@ -73,17 +73,20 @@ endif()
 
 # Dependencies for the runner library
 set(static_llm_runner_deps
-    executorch_core extension_data_loader extension_module extension_tensor
-    extension_flat_tensor extension_llm_runner
+    executorch_core
+    extension_data_loader
+    extension_module
+    extension_tensor
+    extension_flat_tensor
+    extension_llm_runner
+    executorch_backends
 )
 
 # Add CoreML delegate if available (required for running CoreML models) Note:
 # coremldelegate is linked transitively through executorch build system when
 # EXECUTORCH_BUILD_COREML is ON. We don't need to link it again here to avoid
-# duplicate symbol errors.
-if(TARGET coremldelegate)
-  list(APPEND static_llm_runner_deps coremldelegate)
-endif()
+# duplicate symbol errors. if(TARGET coremldelegate) list(APPEND
+# static_llm_runner_deps coremldelegate) endif()
 
 target_link_libraries(static_llm_runner PUBLIC ${static_llm_runner_deps})
 target_link_libraries(static_llm_runner PUBLIC tokenizers::tokenizers)