@@ -440,9 +440,8 @@ jobs:
440440 # Test llama2
441441 PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
442442
443- # Compile only as weight sharing is not applicable on x86
444- test-static-llama-size-qnn-linux :
445- name : test-static-llama-size-qnn-linux
443+ test-static-llama-qnn-linux :
444+ name : test-static-llama-qnn-linux
446445 uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
447446 permissions :
448447 id-token : write
@@ -470,49 +469,9 @@ jobs:
470469
471470 # Setup install_requirements for llama
472471 PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
473-
474- # Retrieve 110M Stories Llama Artifacts
475- PYTHON_EXECUTABLE=python . .ci/scripts/utils.sh
476- PYTHON_EXECUTABLE=python download_stories_model_artifacts
477- PYTHONPATH="${PWD}" python -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
478-
479- # Test static llama stories110m pte size
480- PYTHONPATH="${PWD}" python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
481-
482- # Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
483- test-static-llama-accuracy-qnn-linux :
484- name : test-static-llama-accuracy-qnn-linux
485- uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
486- strategy :
487- fail-fast : false
488- with :
489- runner : linux.2xlarge
490- docker-image : executorch-ubuntu-22.04-qnn-sdk
491- submodules : ' true'
492- ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
493- timeout : 900
494- script : |
495- # The generic Linux job chooses to use base env, not the one setup by the image
496- CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
497- conda activate "${CONDA_ENV}"
498-
499- BUILD_TOOL="cmake"
500-
501- PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
502- PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
503-
504- # Setup executorch
505- PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
506-
507- # Setup install_requirements for llama
508- PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
509-
510- # Retrieve 110M Stories Llama Artifacts
511- PYTHON_EXECUTABLE=python . .ci/scripts/utils.sh
512- PYTHON_EXECUTABLE=python download_stories_model_artifacts
513472
514- # Test static llama stories110m accuracy
515- PYTHONPATH="${PWD}" python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86_64/ --executorch_root . --artifact_dir . --enable_x86_64
473+ # Test static llama weight sharing and accuracy
474+ PYTHON_EXECUTABLE= python bash .ci/scripts/test_qnn_static_llama.sh
516475
517476 test-qnn-models-linux :
518477 name : test-qnn-models-linux
0 commit comments