Skip to content

Commit 2c67a95

Browse files
committed
Update on "Add update_quantized_cache op"
Why? - ton of copies due to functionalization - mutable buffer support without such custom inplace ops will results in giant copies at the end - Making inplace ops work will likely take longer and not clear safe path Differential Revision: [D62301838](https://our.internmc.facebook.com/intern/diff/D62301838/) [ghstack-poisoned]
2 parents 6fec56f + d609e40 commit 2c67a95

File tree

123 files changed

+3724
-791
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+3724
-791
lines changed

.ci/scripts/test_phi_3_mini.sh

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
BUILD_TYPE=${1:-Debug}
11+
BUILD_DIR=${3:-cmake-out}
12+
MODEL_DIR=examples/models/phi-3-mini
13+
14+
echo "Building with BUILD_TYPE: $BUILD_TYPE, BUILD_DIR: $BUILD_DIR"
15+
16+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
17+
PYTHON_EXECUTABLE=python3
18+
fi
19+
20+
# Number of processes for a parallel build
21+
NPROC=8
22+
if hash nproc &> /dev/null; then NPROC=$(nproc); fi
23+
24+
cmake_install_executorch_libraries() {
25+
cmake -DPYTHON_EXECUTABLE=python \
26+
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
27+
-DEXECUTORCH_ENABLE_LOGGING=1 \
28+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
29+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
30+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
31+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
32+
-DEXECUTORCH_BUILD_XNNPACK=ON \
33+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
34+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
35+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
36+
-B${BUILD_DIR} .
37+
38+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
39+
}
40+
41+
cmake_build_phi_3_mini() {
42+
cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
43+
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
44+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
45+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
46+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
47+
-DEXECUTORCH_BUILD_XNNPACK=ON \
48+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
49+
-B${BUILD_DIR}/${MODEL_DIR} \
50+
${MODEL_DIR}
51+
52+
cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE}
53+
}
54+
55+
# Download and convert tokenizer.model
56+
prepare_tokenizer() {
57+
echo "Downloading and converting tokenizer.model"
58+
wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
59+
$PYTHON_EXECUTABLE -m executorch.extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
60+
}
61+
62+
# Export phi-3-mini model to pte
63+
export_phi_3_mini () {
64+
echo "Exporting phi-3-mini. This will take a few minutes"
65+
$PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
66+
}
67+
68+
run_and_verify() {
69+
NOW=$(date +"%H:%M:%S")
70+
echo "Starting to run phi-3-mini runner at ${NOW}"
71+
if [[ ! -f "phi-3-mini.pte" ]]; then
72+
echo "Export failed. Abort"
73+
exit 1
74+
fi
75+
if [[ ! -f "tokenizer.bin" ]]; then
76+
echo "tokenizer.bin is missing."
77+
exit 1
78+
fi
79+
80+
${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
81+
--model_path=phi-3-mini.pte \
82+
--tokenizer_path=tokenizer.bin \
83+
--seq_len=128 \
84+
--temperature=0 \
85+
--prompt="<|system|>
86+
You are a helpful assistant.<|end|>
87+
<|user|>
88+
What is the capital of France?<|end|>
89+
<|assistant|>" > result.txt
90+
91+
# verify result.txt
92+
RESULT=$(cat result.txt)
93+
EXPECTED_RESULT="The capital of France is Paris."
94+
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
95+
echo "Expected result prefix: ${EXPECTED_RESULT}"
96+
echo "Actual result: ${RESULT}"
97+
echo "Success"
98+
exit 0
99+
else
100+
echo "Expected result prefix: ${EXPECTED_RESULT}"
101+
echo "Actual result: ${RESULT}"
102+
echo "Failure; results not the same"
103+
exit 1
104+
fi
105+
}
106+
107+
# Step 1. Build ExecuTorch and phi-3-mini runner
108+
cmake_install_executorch_libraries
109+
cmake_build_phi_3_mini
110+
111+
# Step 2. Export the tokenizer and model
112+
prepare_tokenizer
113+
export_phi_3_mini
114+
115+
# Step 3. Run and verify result
116+
run_and_verify

.github/workflows/pull.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,3 +414,30 @@ jobs:
414414
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
415415
# Test llama2
416416
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
417+
418+
test-phi-3-mini-runner-linux:
419+
name: test-phi-3-mini-runner-linux
420+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
421+
strategy:
422+
fail-fast: false
423+
with:
424+
runner: linux.24xlarge
425+
docker-image: executorch-ubuntu-22.04-clang12
426+
submodules: 'true'
427+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
428+
timeout: 90
429+
script: |
430+
# The generic Linux job chooses to use base env, not the one setup by the image
431+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
432+
conda activate "${CONDA_ENV}"
433+
434+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
435+
436+
# install pybind
437+
bash install_requirements.sh --pybind xnnpack
438+
439+
# install phi-3-mini requirements
440+
bash examples/models/phi-3-mini/install_requirements.sh
441+
442+
# run e2e (export, tokenizer and runner)
443+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh

CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -680,11 +680,16 @@ if(EXECUTORCH_BUILD_PYBIND)
680680
etdump
681681
executorch
682682
extension_data_loader
683-
portable_ops_lib
684683
util
685684
torch
686685
)
687686

687+
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
688+
list(APPEND _dep_libs optimized_native_cpu_ops_lib)
689+
else()
690+
list(APPEND _dep_libs portable_ops_lib)
691+
endif()
692+
688693
if(EXECUTORCH_BUILD_COREML)
689694
list(APPEND _dep_libs coremldelegate)
690695
endif()

backends/arm/arm_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ def __init__(self):
5252
def ethosu_compile_spec(
5353
self,
5454
config: str,
55-
system_config: Optional[str] = None,
56-
memory_mode: Optional[str] = None,
55+
system_config: str,
56+
memory_mode: str,
5757
extra_flags: Optional[str] = None,
5858
config_ini: Optional[str] = "Arm/vela.ini",
5959
) -> "ArmCompileSpecBuilder":

backends/arm/test/common.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,29 @@ def is_option_enabled(option: str, fail_if_not_enabled: bool = False) -> bool:
8686
return False
8787

8888

89+
def maybe_get_tosa_collate_path() -> str | None:
90+
"""
91+
Checks the environment variable TOSA_TESTCASES_BASE_PATH and returns the
92+
path to the where to store the current tests if it is set.
93+
"""
94+
tosa_test_base = os.environ.get("TOSA_TESTCASES_BASE_PATH")
95+
if tosa_test_base:
96+
current_test = os.environ.get("PYTEST_CURRENT_TEST")
97+
#'backends/arm/test/ops/test_mean_dim.py::TestMeanDim::test_meandim_tosa_BI_0_zeros (call)'
98+
test_class = current_test.split("::")[1]
99+
test_name = current_test.split("::")[-1].split(" ")[0]
100+
if "BI" in test_name:
101+
tosa_test_base = os.path.join(tosa_test_base, "tosa-bi")
102+
elif "MI" in test_name:
103+
tosa_test_base = os.path.join(tosa_test_base, "tosa-mi")
104+
else:
105+
tosa_test_base = os.path.join(tosa_test_base, "other")
106+
107+
return os.path.join(tosa_test_base, test_class, test_name)
108+
109+
return None
110+
111+
89112
def get_tosa_compile_spec(
90113
permute_memory_to_nhwc=True, custom_path=None
91114
) -> list[CompileSpec]:
@@ -101,7 +124,13 @@ def get_tosa_compile_spec_unbuilt(
101124
"""Get the ArmCompileSpecBuilder for the default TOSA tests, to modify
102125
the compile spec before calling .build() to finalize it.
103126
"""
104-
intermediate_path = custom_path or tempfile.mkdtemp(prefix="arm_tosa_")
127+
if not custom_path:
128+
intermediate_path = maybe_get_tosa_collate_path() or tempfile.mkdtemp(
129+
prefix="arm_tosa_"
130+
)
131+
else:
132+
intermediate_path = custom_path
133+
105134
if not os.path.exists(intermediate_path):
106135
os.makedirs(intermediate_path, exist_ok=True)
107136
compile_spec_builder = (

backends/arm/test/misc/test_debug_feats.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import logging
88
import os
9+
import shutil
910
import tempfile
1011
import unittest
1112

@@ -149,3 +150,39 @@ def test_dump_ops_and_dtypes(self):
149150
.dump_operator_distribution()
150151
)
151152
# Just test that there are no execeptions.
153+
154+
155+
class TestCollateTosaTests(unittest.TestCase):
156+
"""Tests the collation of TOSA tests through setting the environment variable TOSA_TESTCASE_BASE_PATH."""
157+
158+
def test_collate_tosa_BI_tests(self):
159+
# Set the environment variable to trigger the collation of TOSA tests
160+
os.environ["TOSA_TESTCASES_BASE_PATH"] = "test_collate_tosa_tests"
161+
# Clear out the directory
162+
163+
model = Linear(20, 30)
164+
(
165+
ArmTester(
166+
model,
167+
example_inputs=model.get_inputs(),
168+
compile_spec=common.get_tosa_compile_spec(),
169+
)
170+
.quantize()
171+
.export()
172+
.to_edge()
173+
.partition()
174+
.to_executorch()
175+
)
176+
# test that the output directory is created and contains the expected files
177+
assert os.path.exists(
178+
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests"
179+
)
180+
assert os.path.exists(
181+
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/output_tag8.tosa"
182+
)
183+
assert os.path.exists(
184+
"test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/desc_tag8.json"
185+
)
186+
187+
os.environ.pop("TOSA_TESTCASES_BASE_PATH")
188+
shutil.rmtree("test_collate_tosa_tests", ignore_errors=True)

backends/arm/test/models/test_mobilenet_v2_arm.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,18 @@ def test_mv2_u55_BI(self):
102102
tester.run_method_and_compare_outputs(
103103
atol=1.0, qtol=1, inputs=self.model_inputs
104104
)
105+
106+
def test_mv2_u85_BI(self):
107+
(
108+
ArmTester(
109+
self.mv2,
110+
example_inputs=self.model_inputs,
111+
compile_spec=common.get_u85_compile_spec(permute_memory_to_nhwc=True),
112+
)
113+
.quantize()
114+
.export()
115+
.to_edge(config=self._edge_compile_config)
116+
.check(list(self.operators_after_quantization))
117+
.partition()
118+
.to_executorch()
119+
)

backends/arm/test/ops/test_add.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from executorch.backends.arm.test import common
1414
from executorch.backends.arm.test.tester.arm_tester import ArmTester
1515
from executorch.exir import EdgeCompileConfig
16+
from executorch.exir.backend.compile_spec_schema import CompileSpec
1617
from parameterized import parameterized
1718

1819

@@ -92,16 +93,17 @@ def _test_add_tosa_BI_pipeline(
9293
.run_method_and_compare_outputs(inputs=test_data, qtol=1)
9394
)
9495

95-
def _test_add_u55_BI_pipeline(
96+
def _test_add_ethos_BI_pipeline(
9697
self,
9798
module: torch.nn.Module,
99+
compile_spec: CompileSpec,
98100
test_data: Tuple[torch.Tensor],
99101
):
100102
tester = (
101103
ArmTester(
102104
module,
103105
example_inputs=test_data,
104-
compile_spec=common.get_u55_compile_spec(permute_memory_to_nhwc=True),
106+
compile_spec=compile_spec,
105107
)
106108
.quantize()
107109
.export()
@@ -114,8 +116,7 @@ def _test_add_u55_BI_pipeline(
114116
.serialize()
115117
)
116118

117-
if common.is_option_enabled("corstone300"):
118-
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
119+
return tester
119120

120121
@parameterized.expand(Add.test_parameters)
121122
def test_add_tosa_MI(self, test_data: torch.Tensor):
@@ -130,7 +131,22 @@ def test_add_tosa_BI(self, test_data: torch.Tensor):
130131
@parameterized.expand(Add.test_parameters)
131132
def test_add_u55_BI(self, test_data: torch.Tensor):
132133
test_data = (test_data,)
133-
self._test_add_u55_BI_pipeline(self.Add(), test_data)
134+
tester = self._test_add_ethos_BI_pipeline(
135+
self.Add(),
136+
common.get_u55_compile_spec(permute_memory_to_nhwc=True),
137+
test_data,
138+
)
139+
if common.is_option_enabled("corstone300"):
140+
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
141+
142+
@parameterized.expand(Add.test_parameters)
143+
def test_add_u85_BI(self, test_data: torch.Tensor):
144+
test_data = (test_data,)
145+
self._test_add_ethos_BI_pipeline(
146+
self.Add(),
147+
common.get_u85_compile_spec(permute_memory_to_nhwc=True),
148+
test_data,
149+
)
134150

135151
@parameterized.expand(Add2.test_parameters)
136152
def test_add2_tosa_MI(self, operand1: torch.Tensor, operand2: torch.Tensor):
@@ -145,4 +161,15 @@ def test_add2_tosa_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
145161
@parameterized.expand(Add2.test_parameters)
146162
def test_add2_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
147163
test_data = (operand1, operand2)
148-
self._test_add_u55_BI_pipeline(self.Add2(), test_data)
164+
tester = self._test_add_ethos_BI_pipeline(
165+
self.Add2(), common.get_u55_compile_spec(), test_data
166+
)
167+
if common.is_option_enabled("corstone300"):
168+
tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
169+
170+
@parameterized.expand(Add2.test_parameters)
171+
def test_add2_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor):
172+
test_data = (operand1, operand2)
173+
self._test_add_ethos_BI_pipeline(
174+
self.Add2(), common.get_u85_compile_spec(), test_data
175+
)

0 commit comments

Comments
 (0)