@@ -442,8 +442,11 @@ jobs:
442442
443443 # Compile only as weight sharing is not applicable on x86
444444 test-static-llama-size-qnn-linux :
445- name : test-static-llama-runner-qnn-linux
446- uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
445+ name : test-static-llama-size-qnn-linux
446+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
447+ permissions :
448+ id-token : write
449+ contents : read
447450 strategy :
448451 fail-fast : false
449452 with :
@@ -457,22 +460,28 @@ jobs:
457460 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
458461 conda activate "${CONDA_ENV}"
459462
463+ BUILD_TOOL="cmake"
464+
460465 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
461466 PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
462467
463468 # Setup executorch
464469 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
465470
471+ # Setup install_requirements for llama
472+ PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
473+
466474 # Retrieve 110M Stories Llama Artifacts
475+ PYTHON_EXECUTABLE=python . .ci/scripts/utils.sh
467476 PYTHON_EXECUTABLE=python download_stories_model_artifacts
468- $PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
477+ PYTHONPATH="${PWD}" python -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
469478
470479 # Test static llama stories110m pte size
471- PYTHON_EXECUTABLE= python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only"
480+ PYTHONPATH="${PWD}" python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
472481
473482 # Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
474483 test-static-llama-accuracy-qnn-linux :
475- name : test-static-llama-runner -qnn-linux
484+ name : test-static-llama-accuracy -qnn-linux
476485 uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
477486 strategy :
478487 fail-fast : false
@@ -487,18 +496,23 @@ jobs:
487496 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
488497 conda activate "${CONDA_ENV}"
489498
499+ BUILD_TOOL="cmake"
500+
490501 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
491502 PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
492503
493504 # Setup executorch
494505 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
495506
507+ # Setup install_requirements for llama
508+ PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
509+
496510 # Retrieve 110M Stories Llama Artifacts
511+ PYTHON_EXECUTABLE=python . .ci/scripts/utils.sh
497512 PYTHON_EXECUTABLE=python download_stories_model_artifacts
498- $PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
499513
500514 # Test static llama stories110m accuracy
501- PYTHON_EXECUTABLE= python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86_64/ --executorch_root . --artifact_dir . --enable_x86_64"
515+ PYTHONPATH="${PWD}" python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86_64/ --executorch_root . --artifact_dir . --enable_x86_64
502516
503517 test-qnn-models-linux :
504518 name : test-qnn-models-linux
0 commit comments