Skip to content

Commit ff8ae0b

Browse files
committed
up
1 parent 14302bf commit ff8ae0b

File tree

2 files changed

+48
-45
lines changed

2 files changed

+48
-45
lines changed

.ci/scripts/test_ane_static_llama.sh

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -59,45 +59,45 @@ cmake -S "${EXECUTORCH_ROOT}" -B "${BUILD_DIR}" \
5959

6060
cmake --build "${BUILD_DIR}" -j --target run_static_llm_coreml --config Release
6161

62-
# Run the C++ runner with the CPU model
63-
echo "Running C++ runner with CPU model..."
64-
RUNNER="${BUILD_DIR}/examples/apple/coreml/llama/runner/run_static_llm_coreml"
65-
MODEL_DIR="${EXECUTORCH_ROOT}/examples/apple/coreml/llama"
66-
67-
# Run the model and capture full output for debugging
68-
FULL_OUTPUT=$("${RUNNER}" \
69-
--model "${MODEL_DIR}/model_cpu.pte" \
70-
--params "${MODEL_DIR}/params.json" \
71-
--tokenizer "${MODEL_DIR}/tokenizer.model" \
72-
--prompt "Once upon a time," \
73-
--max_new_tokens 50 2>&1)
74-
75-
echo "Full output:"
76-
echo "${FULL_OUTPUT}"
77-
78-
# Check that the model produced meaningful output
79-
# The output should contain: the prompt "Once upon a time," and the continuation including "there was"
80-
# Due to log interleaving, we check for individual key parts separately
81-
if [[ "${FULL_OUTPUT}" == *"Once upon a time,"* ]] && [[ "${FULL_OUTPUT}" == *"there"* ]] && [[ "${FULL_OUTPUT}" == *"was"* ]]; then
82-
echo "Output contains expected prompt and generated text"
83-
echo "C++ runner test passed!"
84-
else
85-
echo "ERROR: Output does not contain expected text"
86-
echo "Expected: 'Once upon a time,' followed by 'there' and 'was'"
87-
exit 1
88-
fi
89-
90-
# Run lookahead decoding test (currently produces <unk> tokens on stories, but works with llama)
91-
echo "Running C++ runner with lookahead decoding..."
92-
"${RUNNER}" \
93-
--model "${MODEL_DIR}/model_cpu.pte" \
94-
--params "${MODEL_DIR}/params.json" \
95-
--tokenizer "${MODEL_DIR}/tokenizer.model" \
96-
--prompt "Once upon a time," \
97-
--max_new_tokens 50 \
98-
--lookahead
99-
100-
echo "C++ runner lookahead test completed (known issue: produces <unk> tokens)"
62+
# TODO: enable runner once CoreML bug with caching is fixed
63+
# # Run the C++ runner with the CPU model
64+
# echo "Running C++ runner with CPU model..."
65+
# RUNNER="${BUILD_DIR}/examples/apple/coreml/llama/runner/run_static_llm_coreml"
66+
# MODEL_DIR="${EXECUTORCH_ROOT}/examples/apple/coreml/llama"
67+
68+
# # Run the model and capture full output for debugging
69+
# FULL_OUTPUT=$("${RUNNER}" \
70+
# --model "${MODEL_DIR}/model.pte" \
71+
# --params "${MODEL_DIR}/params.json" \
72+
# --tokenizer "${MODEL_DIR}/tokenizer.model" \
73+
# --prompt "Once upon a time," \
74+
# --max_new_tokens 50 2>&1)
75+
76+
# echo "Full output:"
77+
# echo "${FULL_OUTPUT}"
78+
79+
# # Check that the model produced meaningful output
80+
# # The output should contain: the prompt "Once upon a time," and the continuation including "there was"
81+
# # Due to log interleaving, we check for individual key parts separately
82+
# if [[ "${FULL_OUTPUT}" == *"Once upon a time,"* ]] && [[ "${FULL_OUTPUT}" == *"there"* ]] && [[ "${FULL_OUTPUT}" == *"was"* ]]; then
83+
# echo "Output contains expected prompt and generated text"
84+
# echo "C++ runner test passed!"
85+
# else
86+
# echo "ERROR: Output does not contain expected text"
87+
# echo "Expected: 'Once upon a time,' followed by 'there' and 'was'"
88+
# exit 1
89+
# fi
90+
91+
# TODO: enable runner once CoreML bug with caching is fixed
92+
# # Run lookahead decoding test (currently produces <unk> tokens on stories, but works with llama)
93+
# echo "Running C++ runner with lookahead decoding..."
94+
# "${RUNNER}" \
95+
# --model "${MODEL_DIR}/model.pte" \
96+
# --params "${MODEL_DIR}/params.json" \
97+
# --tokenizer "${MODEL_DIR}/tokenizer.model" \
98+
# --prompt "Once upon a time," \
99+
# --max_new_tokens 50 \
100+
# --lookahead
101101

102102
# Test export of deprecated model
103103
pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama

examples/apple/coreml/llama/runner/CMakeLists.txt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,20 @@ endif()
7373

7474
# Dependencies for the runner library
7575
set(static_llm_runner_deps
76-
executorch_core extension_data_loader extension_module extension_tensor
77-
extension_flat_tensor extension_llm_runner
76+
executorch_core
77+
extension_data_loader
78+
extension_module
79+
extension_tensor
80+
extension_flat_tensor
81+
extension_llm_runner
82+
executorch_backends
7883
)
7984

8085
# Add CoreML delegate if available (required for running CoreML models) Note:
8186
# coremldelegate is linked transitively through executorch build system when
8287
# EXECUTORCH_BUILD_COREML is ON. We don't need to link it again here to avoid
83-
# duplicate symbol errors.
84-
if(TARGET coremldelegate)
85-
list(APPEND static_llm_runner_deps coremldelegate)
86-
endif()
88+
# duplicate symbol errors. if(TARGET coremldelegate) list(APPEND
89+
# static_llm_runner_deps coremldelegate) endif()
8790

8891
target_link_libraries(static_llm_runner PUBLIC ${static_llm_runner_deps})
8992
target_link_libraries(static_llm_runner PUBLIC tokenizers::tokenizers)

0 commit comments

Comments
 (0)