Skip to content

Commit b3e52aa

Browse files
committed
Add Static Stories Llama CI
1 parent eacbeb7 commit b3e52aa

File tree

5 files changed

+76
-6
lines changed

5 files changed

+76
-6
lines changed

.ci/scripts/setup-stories-llama.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -ex
9+
10+
# Download and prepare stories llama model artifacts
11+
prepare_model_artifacts() {
12+
echo "Preparing stories model artifacts"
13+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
14+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
15+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
16+
}
17+
18+
prepare_model_artifacts

.github/workflows/pull.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,31 @@ jobs:
437437
# Test llama2
438438
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
439439
440+
test-static-llama-runner-qnn-linux:
441+
name: test-static-llama-runner-qnn-linux
442+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
443+
strategy:
444+
fail-fast: false
445+
with:
446+
runner: linux.2xlarge
447+
docker-image: executorch-ubuntu-22.04-qnn-sdk
448+
submodules: 'true'
449+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
450+
timeout: 900
451+
script: |
452+
# The generic Linux job chooses to use base env, not the one setup by the image
453+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
454+
conda activate "${CONDA_ENV}"
455+
456+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
457+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
458+
459+
# Retrieve 110M Stories Llama Artifacts
460+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-stories-llama.sh
461+
462+
# Test static llama stories110m
463+
PYTHON_EXECUTABLE=python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only"
464+
440465
test-qnn-models-linux:
441466
name: test-qnn-models-linux
442467
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,8 +3764,6 @@ def test_stories_single_llama(self):
37643764
self.artifact_dir,
37653765
"--build_folder",
37663766
self.build_folder,
3767-
"--device",
3768-
self.device,
37693767
"--model",
37703768
self.model,
37713769
"--checkpoint",
@@ -3788,7 +3786,17 @@ def test_stories_single_llama(self):
37883786
"0",
37893787
"--llama_model",
37903788
"stories110m",
3791-
]
3789+
"--model_mode",
3790+
"hybrid",
3791+
"--prefill_seq_len",
3792+
"32",
3793+
"--kv_seq_len",
3794+
"128",
3795+
]
3796+
if self.compile_only:
3797+
cmds.extend(["--compile_only"])
3798+
else:
3799+
cmds.extend(["--device", self.device])
37923800
if self.host:
37933801
cmds.extend(["--host", self.host])
37943802

@@ -3801,8 +3809,11 @@ def test_stories_single_llama(self):
38013809
if "Error" in msg:
38023810
self.fail(msg["Error"])
38033811
else:
3804-
model_out = msg["result"][0]
3805-
self.assertTrue(model_out.startswith(golden_start_with))
3812+
if not self.compile_only:
3813+
model_out = msg["result"][0]
3814+
self.assertTrue(model_out.startswith(golden_start_with))
3815+
pte_size = msg["pte_size"]
3816+
self.assertLessEqual(pte_size, 130000000)
38063817

38073818
@unittest.skip("dynamic shape inputs appear in recent torch.export.export")
38083819
def test_mobilebert(self):
@@ -4031,6 +4042,8 @@ def setup_environment():
40314042
TestQNN.shared_buffer = args.shared_buffer
40324043
TestQNN.enable_x86_64 = args.enable_x86_64
40334044
TestQNN.dump_intermediate_outputs = args.dump_intermediate_outputs
4045+
TestQNN.compile_only = args.compile_only
4046+
40344047
return sys.argv[:1] + ns_args
40354048

40364049

backends/qualcomm/tests/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ class TestQNN(unittest.TestCase):
182182
use_16a4w: str = "16a4w"
183183
shared_buffer: bool = False
184184
enable_x86_64: bool = False
185+
compile_only: bool = False
185186

186187
def _assert_outputs_equal(self, model_output, ref_output):
187188
self.assertTrue(len(ref_output) == len(model_output))

examples/qualcomm/oss_scripts/llama/llama.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,13 +843,14 @@ def post_process():
843843
outputs.append(f.read())
844844

845845
adb.pull(output_path=args.artifact, callback=post_process)
846-
847846
if args.ip and args.port != -1:
847+
pte_size = os.path.getsize(pte_path)
848848
with Client((args.ip, args.port)) as conn:
849849
conn.send(
850850
json.dumps(
851851
{
852852
"result": outputs,
853+
"pte_size": pte_size,
853854
}
854855
)
855856
)
@@ -1062,6 +1063,18 @@ def main(args) -> None:
10621063
)
10631064
else:
10641065
logging.warning("Quant attributes of the logit is None.")
1066+
1067+
if args.ip and args.port != -1:
1068+
pte_path = f"{args.artifact}/{pte_filename}.pte"
1069+
pte_size = os.path.getsize(pte_path)
1070+
with Client((args.ip, args.port)) as conn:
1071+
conn.send(
1072+
json.dumps(
1073+
{
1074+
"pte_size": pte_size,
1075+
}
1076+
)
1077+
)
10651078
exit(f"Finish compile_only and save to {args.artifact}")
10661079

10671080
try:

0 commit comments

Comments
 (0)