Add test for eager torchtune llama runner

jackzhxng · jackzhxng · commit 504403992b14 · 2024-11-14T17:58:59.000-08:00
diff --git a/.ci/scripts/test_llama_3_2_vision.sh b/.ci/scripts/test_llama_3_2_vision.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+ENABLE_KV_CACHE="${1:-false}"
+
+if [[ "${ENABLE_KV_CACHE}" != "true" && "${ENABLE_KV_CACHE}" != "false" ]]; then
+    echo "Error: ENABLE_KV_CACHE must be 'true' or 'false'"
+    exit 1
+fi
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+download_dependencies() {
+    bash examples/models/llama3_2_vision/install_requirements.sh
+    tune download meta-llama/Llama-3.2-11B-Vision-Instruct --output-dir /tmp/Llama-3.2-11B-Vision-Instruct
+}
+
+run_and_verify_eager() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to test llama3_2_vision text decoder at ${NOW}"
+    if [[ ! -f "/tmp/Llama-3.2-11B-Vision-Instruct/original/consolidated.pth" ]]; then
+        echo "checkpoint (consolidated.pth) is missing."
+        exit 1
+    fi
+    if [[ ! -f "/tmp/Llama-3.2-11B-Vision-Instruct/original/tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    
+    EAGER_RUNNER_ARGS="$PYTHON_EXECUTABLE -m examples.models.llama3_2_vision.runner.eager \
+	-c /tmp/Llama-3.2-11B-Vision-Instruct/original/consolidated.pth \
+	-t /tmp/Llama-3.2-11B-Vision-Instruct/original/tokenizer.model \
+	-d fp32 \
+	--max_seq_length 32 \
+	--temperature 0 \
+        --show_tokens \
+	--prompt \"Once upon a time,\" > result.txt"
+
+    if [[ "${ENABLE_KV_CACHE}" == "true" ]]; then
+	EAGER_RUNNER_ARGS="${EAGER_RUNNER_ARGS} -kv"
+    fi
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_RESULT="727, 471, 263, 2217, 7826, 4257, 365, 2354, 29889, 2296, 18012, 304, 1708, 5377, 297, 278, 6575, 845, 457, 29889, 3118, 2462, 29892, 1183, 4446, 263"
+    if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+download_dependencies
+run_and_verify
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -502,3 +502,31 @@ jobs:
 
         # run llama runner in eager mode
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
+
+  test-llama_3_2_vision_runner_eager-linux:
+    name: test-llama_3_2_vision_runner_eager-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      matrix:
+        enable_kv_cache: ["true", "false]
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
+        # install pybind
+        bash install_requirements.sh
+        # install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+	ENABLE_KV_CACHE=${{ matrix.enable_kv_cache }}
+
+        # run llama runner in eager mode
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh "${ENABLE_KV_CACHE}"