Skip to content

Commit 85e1277

Browse files
committed
Update on "Support export program in intermediate numeric discrepancy detector"
This diff enables intermediate numeric discrepancy detector to leverage export program as label. More specific, if user creates etrecord with exported program, and the exported program is one of the exported programs in the export flow, then our numeric discrepancy detector will use it as label. Otherwise, we will continue use edge dialect graph as label. Differential Revision: [D78298935](https://our.internmc.facebook.com/intern/diff/D78298935/) [ghstack-poisoned]
2 parents df6781c + d2c0f58 commit 85e1277

File tree

36 files changed

+1300
-817
lines changed

36 files changed

+1300
-817
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2dccff7dcf56b0d168ebfd7ca08bdeca37273c56
1+
ab43fe4bdf5ccd82897f0e982c451a0127bd175e

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ jobs:
269269
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
270270
bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
271271
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
272-
CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
272+
CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
273273
cmake --build cmake-out -j9 --target install --config Release
274274
CXXFLAGS=${cxx_flags} cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
275275
cmake --build cmake-out/test -j9 --config Release

backends/qualcomm/_passes/layout_transform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ class LayoutTransform(ExportPass):
103103
exir_ops.edge.aten.pow.Tensor_Scalar,
104104
exir_ops.edge.aten.prelu.default,
105105
exir_ops.edge.aten.repeat.default,
106-
exir_ops.edge.aten.round.default,
107106
exir_ops.edge.aten.relu.default,
107+
exir_ops.edge.aten.round.default,
108108
exir_ops.edge.aten.sigmoid.default,
109109
exir_ops.edge.aten.split_with_sizes.default,
110110
exir_ops.edge.aten.split_with_sizes_copy.default,

backends/qualcomm/quantizer/annotators.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->
278278
)
279279

280280

281-
@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])
281+
@register_annotator(
282+
[torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]
283+
)
282284
def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:
283285
annotate_binary(node, quantization_config)
284286

@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:
13111313
)
13121314

13131315

1314-
@register_annotator([torch.ops.aten.zeros.default])
1316+
@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])
13151317
def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:
13161318
if _is_annotated([node]) or not _is_float_tensor(node):
13171319
return

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
153153
)
154154

155155

156-
def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None: # noqa: C901
156+
def annotate_matmul_16a8w( # noqa: C901
157+
gm: torch.fx.GraphModule, annotate_conv=True
158+
) -> None:
157159
"""
158160
This function is specific for matmul op 16a8w.
159161
For k, we will tag such as the below, and
@@ -317,9 +319,10 @@ def annotate_matmul_input1(node: Node):
317319
# The arguments of cat op: (the past kv cache, the new kv cache)
318320
node = node.args[0][1]
319321
elif node.target == torch.ops.aten.conv2d.default:
320-
annotate_conv2d(
321-
node, quantization_config=quantization_config_8a4w_per_channel
322-
)
322+
if annotate_conv:
323+
annotate_conv2d(
324+
node, quantization_config=quantization_config_8a4w_per_channel
325+
)
323326
break
324327
elif node.target in [torch.ops.aten.add.Tensor, torch.ops.aten.sub.Tensor]:
325328
break

backends/qualcomm/scripts/build.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ if [ "$BUILD_AARCH64" = true ]; then
8585
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
8686
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8787
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
88+
-DEXECUTORCH_ENABLE_LOGGING=ON \
8889
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
8990
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
9091
-DANDROID_ABI='arm64-v8a' \
@@ -104,6 +105,9 @@ if [ "$BUILD_AARCH64" = true ]; then
104105
-DANDROID_ABI='arm64-v8a' \
105106
-DANDROID_PLATFORM=android-30 \
106107
-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
108+
-DSUPPORT_REGEX_LOOKAHEAD=ON \
109+
-DBUILD_TESTING=OFF \
110+
-DEXECUTORCH_ENABLE_LOGGING=ON \
107111
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
108112
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
109113
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
@@ -134,6 +138,7 @@ if [ "$BUILD_X86_64" = true ]; then
134138
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
135139
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
136140
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
141+
-DEXECUTORCH_ENABLE_LOGGING=ON \
137142
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
138143
-S $PRJ_ROOT \
139144
-B $BUILD_ROOT \
@@ -157,6 +162,9 @@ if [ "$BUILD_X86_64" = true ]; then
157162
-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
158163
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
159164
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
165+
-DSUPPORT_REGEX_LOOKAHEAD=ON \
166+
-DBUILD_TESTING=OFF \
167+
-DEXECUTORCH_ENABLE_LOGGING=ON \
160168
-B$EXAMPLE_ROOT
161169

162170
cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4049,7 +4049,7 @@ def test_llama3_2_1b(self):
40494049
"16a4w",
40504050
"--temperature",
40514051
"0",
4052-
"--llama_model",
4052+
"--decoder_model",
40534053
"llama3_2",
40544054
"--model_mode",
40554055
"hybrid",
@@ -4129,7 +4129,7 @@ def test_llama_stories_110m(self):
41294129
"16a4w",
41304130
"--temperature",
41314131
"0",
4132-
"--llama_model",
4132+
"--decoder_model",
41334133
"stories110m",
41344134
"--model_mode",
41354135
"hybrid",
@@ -4171,6 +4171,65 @@ def test_llama_stories_110m(self):
41714171
if not self.compile_only and not self.enable_x86_64:
41724172
self.assertGreaterEqual(msg["inference_speed"], 220) # Lanai
41734173

4174+
def test_qwen2_5(self):
4175+
if not self.required_envs():
4176+
self.skipTest("missing required envs")
4177+
4178+
prompt = "My favourite condiment is "
4179+
cmds = [
4180+
"python",
4181+
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
4182+
"--artifact",
4183+
self.artifact_dir,
4184+
"--build_folder",
4185+
self.build_folder,
4186+
"--model",
4187+
self.model,
4188+
"--ip",
4189+
self.ip,
4190+
"--port",
4191+
str(self.port),
4192+
"--prompt",
4193+
f"{prompt}",
4194+
"--ptq",
4195+
"16a8w",
4196+
"--decoder_model",
4197+
"qwen2_5",
4198+
"--model_mode",
4199+
"hybrid",
4200+
"--prefill_ar_len",
4201+
"32",
4202+
"--max_seq_len",
4203+
"128",
4204+
]
4205+
if self.compile_only:
4206+
cmds.extend(["--compile_only"])
4207+
elif self.device:
4208+
cmds.extend(["--device", self.device])
4209+
if self.host:
4210+
cmds.extend(["--host", self.host])
4211+
elif self.enable_x86_64:
4212+
cmds.extend(["--enable_x86_64"])
4213+
if self.pre_gen_pte:
4214+
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
4215+
4216+
# Accuracy is bad for now. Just check user's prompt is returned.
4217+
golden_start_with = "My favourite condiment is "
4218+
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
4219+
with Listener((self.ip, self.port)) as listener:
4220+
conn = listener.accept()
4221+
p.communicate()
4222+
msg = json.loads(conn.recv())
4223+
if "Error" in msg:
4224+
self.fail(msg["Error"])
4225+
else:
4226+
model_out = msg["result"][0]
4227+
self.assertTrue(
4228+
model_out.startswith(golden_start_with),
4229+
f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
4230+
)
4231+
self.assertGreaterEqual(msg["inference_speed"], 95) # Lanai
4232+
41744233

41754234
class TestExampleOssScript(TestQNN):
41764235
def test_albert(self):

backends/test/suite/flow.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,13 @@ def all_flows() -> dict[str, TestFlow]:
6262
except Exception as e:
6363
logger.info(f"Skipping Core ML flow registration: {e}")
6464

65+
try:
66+
from executorch.backends.test.suite.flows.vulkan import VULKAN_TEST_FLOW
67+
68+
flows += [
69+
VULKAN_TEST_FLOW,
70+
]
71+
except Exception as e:
72+
logger.info(f"Skipping Vulkan flow registration: {e}")
73+
6574
return {f.name: f for f in flows if f is not None}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from executorch.backends.test.suite.flow import TestFlow
2+
from executorch.backends.vulkan.test.tester import VulkanTester
3+
4+
5+
def _create_vulkan_flow(
6+
name: str,
7+
quantize: bool = False,
8+
) -> TestFlow:
9+
return TestFlow(
10+
name,
11+
backend="vulkan",
12+
tester_factory=VulkanTester,
13+
quantize=quantize,
14+
)
15+
16+
17+
VULKAN_TEST_FLOW = _create_vulkan_flow("vulkan")

backends/vulkan/test/TARGETS

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
2+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
23

34
oncall("executorch")
45

@@ -57,3 +58,12 @@ python_unittest(
5758
"//executorch/backends/vulkan:vulkan_preprocess",
5859
],
5960
)
61+
62+
runtime.python_library(
63+
name = "tester",
64+
srcs = ["tester.py"],
65+
deps = [
66+
"//executorch/backends/vulkan/partitioner:vulkan_partitioner",
67+
"//executorch/backends/vulkan:vulkan_preprocess",
68+
]
69+
)

0 commit comments

Comments
 (0)