Skip to content

Commit d462f4e

Browse files
author
pytorchbot
committed
2024-10-19 nightly release (995dcaf)
1 parent 9b9fc93 commit d462f4e

File tree

15 files changed

+351
-16
lines changed

15 files changed

+351
-16
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--tasks mmlu \
44+
-f 5 \
45+
--max_seq_length 2048 \
46+
--limit 5 > result.txt
47+
48+
# Verify result.txt
49+
RESULT=$(cat result.txt)
50+
EXPECTED_TASK="mmlu"
51+
EXPECTED_RESULT="acc"
52+
if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
53+
echo "Actual result: ${RESULT}"
54+
echo "Success"
55+
exit 0
56+
else
57+
echo "Actual result: ${RESULT}"
58+
echo "Failure; results not the same"
59+
exit 1
60+
fi
61+
}
62+
63+
prepare_model_artifacts
64+
run_and_verify
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--max_seq_length 2048 \
44+
--limit 5 > result.txt
45+
46+
# Verify result.txt
47+
RESULT=$(cat result.txt)
48+
EXPECTED_TASK="wikitext"
49+
EXPECTED_RESULT="word_perplexity"
50+
if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
51+
echo "Actual result: ${RESULT}"
52+
echo "Success"
53+
exit 0
54+
else
55+
echo "Actual result: ${RESULT}"
56+
echo "Failure; results not the same"
57+
exit 1
58+
fi
59+
}
60+
61+
prepare_model_artifacts
62+
run_and_verify
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--max_seq_length 32 \
44+
--temperature 0 \
45+
--prompt "Once upon a time," > result.txt
46+
47+
# Verify result.txt
48+
RESULT=$(cat result.txt)
49+
EXPECTED_RESULT="there was a little girl"
50+
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
51+
echo "Actual result: ${RESULT}"
52+
echo "Success"
53+
exit 0
54+
else
55+
echo "Actual result: ${RESULT}"
56+
echo "Failure; results not the same"
57+
exit 1
58+
fi
59+
}
60+
61+
prepare_model_artifacts
62+
run_and_verify

.github/workflows/pull.yml

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,3 +447,84 @@ jobs:
447447
448448
# run e2e (export, tokenizer and runner)
449449
PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh
450+
451+
test-eval_llama-wikitext-linux:
452+
name: test-eval_llama-wikitext-linux
453+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
454+
strategy:
455+
fail-fast: false
456+
with:
457+
runner: linux.24xlarge
458+
docker-image: executorch-ubuntu-22.04-clang12
459+
submodules: 'true'
460+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
461+
timeout: 90
462+
script: |
463+
# The generic Linux job chooses to use base env, not the one setup by the image
464+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
465+
conda activate "${CONDA_ENV}"
466+
467+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
468+
469+
# install pybind
470+
bash install_requirements.sh --pybind xnnpack
471+
472+
# install llama requirements
473+
bash examples/models/llama/install_requirements.sh
474+
475+
# run eval_llama wikitext task
476+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh
477+
478+
test-eval_llama-mmlu-linux:
479+
name: test-eval_llama-mmlu-linux
480+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
481+
strategy:
482+
fail-fast: false
483+
with:
484+
runner: linux.24xlarge
485+
docker-image: executorch-ubuntu-22.04-clang12
486+
submodules: 'true'
487+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
488+
timeout: 90
489+
script: |
490+
# The generic Linux job chooses to use base env, not the one setup by the image
491+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
492+
conda activate "${CONDA_ENV}"
493+
494+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
495+
496+
# install pybind
497+
bash install_requirements.sh --pybind xnnpack
498+
499+
# install llama requirements
500+
bash examples/models/llama/install_requirements.sh
501+
502+
# run eval_llama mmlu task
503+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
504+
505+
test-llama_runner_eager-linux:
506+
name: test-llama_runner_eager-linux
507+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
508+
strategy:
509+
fail-fast: false
510+
with:
511+
runner: linux.24xlarge
512+
docker-image: executorch-ubuntu-22.04-clang12
513+
submodules: 'true'
514+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
515+
timeout: 90
516+
script: |
517+
# The generic Linux job chooses to use base env, not the one setup by the image
518+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
519+
conda activate "${CONDA_ENV}"
520+
521+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
522+
523+
# install pybind
524+
bash install_requirements.sh --pybind xnnpack
525+
526+
# install llama requirements
527+
bash examples/models/llama/install_requirements.sh
528+
529+
# run llama runner in eager mode
530+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh

.github/workflows/update-viablestrict.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ jobs:
2222
stable-branch: viable/strict
2323
requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"^Android$\", \"^Apple$\"]'
2424
secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }}
25-
rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
25+
clickhouse-url: ${{ secrets.CLICKHOUSE_URL }}
26+
clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }}
27+
clickhouse-password: ${{ secrets.CLICKHOUSE_VIABLESTRICT_PASSWORD }}

examples/models/llama/eval_llama_lib.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,13 @@ def eval_llama(
291291
# Generate the eval wrapper
292292
eval_wrapper = gen_eval_wrapper(model_name, args)
293293

294+
# Needed for loading mmlu dataset.
295+
# See https://github.com/EleutherAI/lm-evaluation-harness/pull/1998/files
296+
if args.tasks and "mmlu" in args.tasks:
297+
import datasets
298+
299+
datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
300+
294301
# Evaluate the model
295302
with torch.no_grad():
296303
eval_results = simple_evaluate(

examples/models/llama/evaluate/eager_eval.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@ def __init__(
4040

4141
@property
4242
def eot_token_id(self):
43-
return self._tokenizer.eot_id
43+
"""
44+
The stories model does not have an EOT token, so we use the EOS token instead.
45+
"""
46+
if hasattr(self._tokenizer, "eot_id"):
47+
return self._tokenizer.eot_id
48+
return self._tokenizer.eos_id
4449

4550
@property
4651
def max_length(self):

examples/models/llama/llama_transformer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ class ModelArgs:
116116
bos_count: int = -1 # i.e., a single EOS is used as BOS
117117
eos_count: int = 2
118118

119+
quantization_args: Optional[dict] = None
120+
lora_args: Optional[dict] = None
121+
119122
def __post_init__(self):
120123
if self.n_kv_heads is None:
121124
self.n_kv_heads = self.n_heads

examples/models/llama/model.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def __init__(self, **kwargs):
165165
)
166166
elif hasattr(self.args, "use_spin_quant") and self.args.use_spin_quant:
167167
print("Using SPIN quantization.")
168-
self._transform_for_pre_quantization(checkpoint)
168+
self._transform_for_pre_quantization(checkpoint, model_args)
169169

170170
from .source_transformation.pre_quantization import (
171171
sanitize_checkpoint_from_pre_quantization,
@@ -174,8 +174,9 @@ def __init__(self, **kwargs):
174174
sanitize_checkpoint_from_pre_quantization(checkpoint)
175175
elif hasattr(self.args, "use_qat") and self.args.use_qat:
176176
print("Using QAT quantization.")
177-
self._transform_for_pre_quantization(checkpoint)
177+
self._transform_for_pre_quantization(checkpoint, model_args)
178178
if hasattr(self.args, "use_lora") and self.args.use_lora:
179+
assert model_args.lora_args["rank"] == self.args.use_lora
179180
from .source_transformation.lora import (
180181
transform_linear_for_lora_after_quantization,
181182
)
@@ -251,7 +252,7 @@ def get_example_inputs_kvcache_sdpa(self):
251252
), # start_pos, what token of output are we on.
252253
)
253254

254-
def _transform_for_pre_quantization(self, checkpoint):
255+
def _transform_for_pre_quantization(self, checkpoint, model_args):
255256
assert hasattr(self.args, "preq_mode"), "preq_mode must be specified"
256257
assert self.args.preq_mode in [
257258
"8da4w",
@@ -264,6 +265,7 @@ def _transform_for_pre_quantization(self, checkpoint):
264265
from .source_transformation.pre_quantization import (
265266
transform_linear_for_pre_quantization,
266267
)
268+
assert self.args.preq_group_size == model_args.quantization_args["group_size"]
267269

268270
mapping = {
269271
"fp32": torch.float32,

examples/models/llama/runner/eager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
import torch
1212

1313
from examples.models.llama.llama_transformer import ModelArgs
14-
from executorch.examples.models.llama2.export_llama_lib import (
14+
from executorch.examples.models.llama.export_llama_lib import (
1515
_prepare_for_llama_export,
1616
build_args_parser as _build_args_parser,
1717
)
18-
from executorch.examples.models.llama2.runner.generation import LlamaRunner
18+
from executorch.examples.models.llama.runner.generation import LlamaRunner
1919
from executorch.extension.llm.export import LLMEdgeManager
2020

2121

0 commit comments

Comments
 (0)