@@ -59,45 +59,45 @@ cmake -S "${EXECUTORCH_ROOT}" -B "${BUILD_DIR}" \
5959
6060cmake --build " ${BUILD_DIR} " -j --target run_static_llm_coreml --config Release
6161
62- # Run the C++ runner with the CPU model
63- echo " Running C++ runner with CPU model... "
64- RUNNER= " ${BUILD_DIR} /examples/apple/coreml/llama/ runner/run_static_llm_coreml "
65- MODEL_DIR =" ${EXECUTORCH_ROOT } /examples/apple/coreml/llama"
66-
67- # Run the model and capture full output for debugging
68- FULL_OUTPUT= $( " ${RUNNER} " \
69- --model " ${MODEL_DIR} /model_cpu.pte " \
70- --params " ${MODEL_DIR} /params.json " \
71- --tokenizer " ${MODEL_DIR} /tokenizer.model " \
72- --prompt " Once upon a time, " \
73- --max_new_tokens 50 2>&1 )
74-
75- echo " Full output: "
76- echo " ${FULL_OUTPUT} "
77-
78- # Check that the model produced meaningful output
79- # The output should contain: the prompt "Once upon a time," and the continuation including "there was"
80- # Due to log interleaving, we check for individual key parts separately
81- if [[ " ${FULL_OUTPUT} " == * " Once upon a time, " * ]] && [[ " ${FULL_OUTPUT} " == * " there " * ]] && [[ " ${FULL_OUTPUT} " == * " was " * ]] ; then
82- echo " Output contains expected prompt and generated text "
83- echo " C++ runner test passed! "
84- else
85- echo " ERROR: Output does not contain expected text "
86- echo " Expected: 'Once upon a time,' followed by 'there' and 'was' "
87- exit 1
88- fi
89-
90- # Run lookahead decoding test (currently produces <unk> tokens on stories, but works with llama)
91- echo " Running C++ runner with lookahead decoding... "
92- " ${RUNNER} " \
93- --model " ${MODEL_DIR} /model_cpu.pte " \
94- --params " ${MODEL_DIR} /params.json " \
95- --tokenizer " ${MODEL_DIR} /tokenizer. model" \
96- --prompt " Once upon a time, " \
97- --max_new_tokens 50 \
98- --lookahead
99-
100- echo " C++ runner lookahead test completed (known issue: produces <unk> tokens) "
62+ # TODO: enable runner once CoreML bug with caching is fixed
63+ # # Run the C++ runner with the CPU model
64+ # echo "Running C++ runner with CPU model... "
65+ # RUNNER ="${BUILD_DIR }/examples/apple/coreml/llama/runner/run_static_llm_coreml "
66+ # MODEL_DIR="${EXECUTORCH_ROOT}/examples/apple/coreml/llama"
67+
68+ # # Run the model and capture full output for debugging
69+ # FULL_OUTPUT=$( "${RUNNER} " \
70+ # --model "${MODEL_DIR}/model.pte " \
71+ # --params "${MODEL_DIR}/params.json " \
72+ # --tokenizer "${MODEL_DIR}/tokenizer.model " \
73+ # --prompt "Once upon a time," \
74+ # --max_new_tokens 50 2>&1)
75+
76+ # echo "Full output: "
77+ # echo "${FULL_OUTPUT}"
78+
79+ # # Check that the model produced meaningful output
80+ # # The output should contain: the prompt "Once upon a time," and the continuation including "there was"
81+ # # Due to log interleaving, we check for individual key parts separately
82+ # if [[ "${FULL_OUTPUT}" == *"Once upon a time,"* ]] && [[ "${FULL_OUTPUT}" == *"there"* ]] && [[ "${FULL_OUTPUT}" == *"was"* ]]; then
83+ # echo "Output contains expected prompt and generated text "
84+ # echo "C++ runner test passed!"
85+ # else
86+ # echo "ERROR: Output does not contain expected text "
87+ # echo "Expected: 'Once upon a time,' followed by 'there' and 'was'"
88+ # exit 1
89+ # fi
90+
91+ # TODO: enable runner once CoreML bug with caching is fixed
92+ # # Run lookahead decoding test (currently produces <unk> tokens on stories, but works with llama)
93+ # echo "Running C++ runner with lookahead decoding..."
94+ # "${RUNNER} " \
95+ # --model "${MODEL_DIR}/model.pte " \
96+ # --params "${MODEL_DIR}/params.json " \
97+ # --tokenizer "${MODEL_DIR}/tokenizer.model" \
98+ # --prompt "Once upon a time," \
99+ # --max_new_tokens 50 \
100+ # -- lookahead
101101
102102# Test export of deprecated model
103103pushd $EXECUTORCH_ROOT /examples/apple/coreml/llama
0 commit comments