Skip to content

Commit dfcabec

Browse files
authored
Merge branch 'main' into fix-unsigned-comparison
2 parents c6fe189 + bd530e8 commit dfcabec

File tree

24 files changed

+557
-232
lines changed

24 files changed

+557
-232
lines changed

.github/workflows/pull.yml

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,3 +762,66 @@ jobs:
762762
763763
# Test selective build
764764
PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh
765+
766+
unittest-nxp-neutron:
767+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
768+
permissions:
769+
id-token: write
770+
contents: read
771+
with:
772+
runner: linux.2xlarge
773+
docker-image: executorch-ubuntu-22.04-clang12
774+
submodules: 'recursive'
775+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
776+
timeout: 90
777+
script: |
778+
set -eux
779+
780+
# The generic Linux job chooses to use base env, not the one setup by the image
781+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
782+
conda activate "${CONDA_ENV}"
783+
784+
# Build and install Executorch
785+
PYTHON_EXECUTABLE=python \
786+
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
787+
.ci/scripts/setup-linux.sh --build-tool "cmake"
788+
789+
# Install test requirements
790+
pip install -r backends/nxp/requirements-tests.txt
791+
792+
# Run pytest
793+
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
794+
795+
# Run aot example:
796+
PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh
797+
798+
799+
nxp-build-test:
800+
name: nxp-build-test
801+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
802+
permissions:
803+
id-token: write
804+
contents: read
805+
with:
806+
runner: linux.2xlarge
807+
docker-image: executorch-ubuntu-22.04-arm-sdk
808+
submodules: 'recursive'
809+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
810+
timeout: 90
811+
script: |
812+
# The generic Linux job chooses to use base env, not the one setup by the image
813+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
814+
conda activate "${CONDA_ENV}"
815+
816+
# Build
817+
cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
818+
cmake --build cmake-out --target executorch_delegate_neutron --config Release
819+
820+
# Build check for the neutron backend library
821+
lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
822+
if [ -f $lib_neutron ]; then
823+
echo "Neutron backend library built."
824+
else
825+
echo "Neutron backend library not found!"
826+
exit 1
827+
fi

.github/workflows/trunk.yml

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -302,36 +302,6 @@ jobs:
302302
exit 1
303303
fi
304304
305-
nxp-build-test:
306-
name: nxp-build-test
307-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
308-
permissions:
309-
id-token: write
310-
contents: read
311-
with:
312-
runner: linux.2xlarge
313-
docker-image: executorch-ubuntu-22.04-arm-sdk
314-
submodules: 'recursive'
315-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
316-
timeout: 90
317-
script: |
318-
# The generic Linux job chooses to use base env, not the one setup by the image
319-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
320-
conda activate "${CONDA_ENV}"
321-
322-
# Build
323-
cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
324-
cmake --build cmake-out --target executorch_delegate_neutron --config Release
325-
326-
# Build check for the neutron backend library
327-
lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
328-
if [ -f $lib_neutron ]; then
329-
echo "Neutron backend library built."
330-
else
331-
echo "Neutron backend library not found!"
332-
exit 1
333-
fi
334-
335305
test-coreml-delegate:
336306
name: test-coreml-delegate
337307
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -771,35 +741,3 @@ jobs:
771741
build-mode: Release
772742
build-tool: cmake
773743
docker-image: executorch-ubuntu-22.04-clang12
774-
775-
unittest-nxp-neutron:
776-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
777-
permissions:
778-
id-token: write
779-
contents: read
780-
with:
781-
runner: linux.2xlarge
782-
docker-image: executorch-ubuntu-22.04-clang12
783-
submodules: 'recursive'
784-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
785-
timeout: 90
786-
script: |
787-
set -eux
788-
789-
# The generic Linux job chooses to use base env, not the one setup by the image
790-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
791-
conda activate "${CONDA_ENV}"
792-
793-
# Build and install Executorch
794-
PYTHON_EXECUTABLE=python \
795-
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
796-
.ci/scripts/setup-linux.sh --build-tool "cmake"
797-
798-
# Install test requirements
799-
pip install -r backends/nxp/requirements-tests.txt
800-
801-
# Run pytest
802-
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
803-
804-
# Run aot example:
805-
PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh

backends/qualcomm/_passes/layout_transform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ class LayoutTransform(ExportPass):
101101
exir_ops.edge.aten.pow.Tensor_Scalar,
102102
exir_ops.edge.aten.prelu.default,
103103
exir_ops.edge.aten.repeat.default,
104-
exir_ops.edge.aten.round.default,
105104
exir_ops.edge.aten.relu.default,
105+
exir_ops.edge.aten.round.default,
106106
exir_ops.edge.aten.sigmoid.default,
107107
exir_ops.edge.aten.split_with_sizes.default,
108108
exir_ops.edge.aten.split_with_sizes_copy.default,

backends/qualcomm/quantizer/annotators.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->
275275
)
276276

277277

278-
@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])
278+
@register_annotator(
279+
[torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]
280+
)
279281
def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:
280282
annotate_binary(node, quantization_config)
281283

@@ -1298,7 +1300,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:
12981300
)
12991301

13001302

1301-
@register_annotator([torch.ops.aten.zeros.default])
1303+
@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])
13021304
def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:
13031305
if _is_annotated([node]) or not _is_float_tensor(node):
13041306
return

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
153153
)
154154

155155

156-
def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None: # noqa: C901
156+
def annotate_matmul_16a8w( # noqa: C901
157+
gm: torch.fx.GraphModule, annotate_conv=True
158+
) -> None:
157159
"""
158160
This function is specific for matmul op 16a8w.
159161
For k, we will tag such as the below, and
@@ -317,9 +319,10 @@ def annotate_matmul_input1(node: Node):
317319
# The arguments of cat op: (the past kv cache, the new kv cache)
318320
node = node.args[0][1]
319321
elif node.target == torch.ops.aten.conv2d.default:
320-
annotate_conv2d(
321-
node, quantization_config=quantization_config_8a4w_per_channel
322-
)
322+
if annotate_conv:
323+
annotate_conv2d(
324+
node, quantization_config=quantization_config_8a4w_per_channel
325+
)
323326
break
324327
elif node.target in [torch.ops.aten.add.Tensor, torch.ops.aten.sub.Tensor]:
325328
break

backends/qualcomm/scripts/build.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ if [ "$BUILD_AARCH64" = true ]; then
8585
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
8686
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
8787
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
88+
-DEXECUTORCH_ENABLE_LOGGING=ON \
8889
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \
8990
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
9091
-DANDROID_ABI='arm64-v8a' \
@@ -104,6 +105,9 @@ if [ "$BUILD_AARCH64" = true ]; then
104105
-DANDROID_ABI='arm64-v8a' \
105106
-DANDROID_PLATFORM=android-30 \
106107
-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
108+
-DSUPPORT_REGEX_LOOKAHEAD=ON \
109+
-DBUILD_TESTING=OFF \
110+
-DEXECUTORCH_ENABLE_LOGGING=ON \
107111
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
108112
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
109113
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
@@ -134,6 +138,7 @@ if [ "$BUILD_X86_64" = true ]; then
134138
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
135139
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
136140
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
141+
-DEXECUTORCH_ENABLE_LOGGING=ON \
137142
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
138143
-S $PRJ_ROOT \
139144
-B $BUILD_ROOT \
@@ -157,6 +162,9 @@ if [ "$BUILD_X86_64" = true ]; then
157162
-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
158163
-DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
159164
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
165+
-DSUPPORT_REGEX_LOOKAHEAD=ON \
166+
-DBUILD_TESTING=OFF \
167+
-DEXECUTORCH_ENABLE_LOGGING=ON \
160168
-B$EXAMPLE_ROOT
161169

162170
cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3999,7 +3999,7 @@ def test_llama3_2_1b(self):
39993999
"16a4w",
40004000
"--temperature",
40014001
"0",
4002-
"--llama_model",
4002+
"--decoder_model",
40034003
"llama3_2",
40044004
"--model_mode",
40054005
"hybrid",
@@ -4079,7 +4079,7 @@ def test_llama_stories_110m(self):
40794079
"16a4w",
40804080
"--temperature",
40814081
"0",
4082-
"--llama_model",
4082+
"--decoder_model",
40834083
"stories110m",
40844084
"--model_mode",
40854085
"hybrid",
@@ -4121,6 +4121,65 @@ def test_llama_stories_110m(self):
41214121
if not self.compile_only and not self.enable_x86_64:
41224122
self.assertGreaterEqual(msg["inference_speed"], 220) # Lanai
41234123

4124+
def test_qwen2_5(self):
4125+
if not self.required_envs():
4126+
self.skipTest("missing required envs")
4127+
4128+
prompt = "My favourite condiment is "
4129+
cmds = [
4130+
"python",
4131+
f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
4132+
"--artifact",
4133+
self.artifact_dir,
4134+
"--build_folder",
4135+
self.build_folder,
4136+
"--model",
4137+
self.model,
4138+
"--ip",
4139+
self.ip,
4140+
"--port",
4141+
str(self.port),
4142+
"--prompt",
4143+
f"{prompt}",
4144+
"--ptq",
4145+
"16a8w",
4146+
"--decoder_model",
4147+
"qwen2_5",
4148+
"--model_mode",
4149+
"hybrid",
4150+
"--prefill_ar_len",
4151+
"32",
4152+
"--max_seq_len",
4153+
"128",
4154+
]
4155+
if self.compile_only:
4156+
cmds.extend(["--compile_only"])
4157+
elif self.device:
4158+
cmds.extend(["--device", self.device])
4159+
if self.host:
4160+
cmds.extend(["--host", self.host])
4161+
elif self.enable_x86_64:
4162+
cmds.extend(["--enable_x86_64"])
4163+
if self.pre_gen_pte:
4164+
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
4165+
4166+
# Accuracy is bad for now. Just check user's prompt is returned.
4167+
golden_start_with = "My favourite condiment is "
4168+
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
4169+
with Listener((self.ip, self.port)) as listener:
4170+
conn = listener.accept()
4171+
p.communicate()
4172+
msg = json.loads(conn.recv())
4173+
if "Error" in msg:
4174+
self.fail(msg["Error"])
4175+
else:
4176+
model_out = msg["result"][0]
4177+
self.assertTrue(
4178+
model_out.startswith(golden_start_with),
4179+
f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
4180+
)
4181+
self.assertGreaterEqual(msg["inference_speed"], 95) # Lanai
4182+
41244183

41254184
class TestExampleOssScript(TestQNN):
41264185
def test_albert(self):

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,12 @@ class ComputeGraph final {
424424
// Scalar Value Extraction
425425
//
426426

427+
bool is_scalar_or_none(const ValueRef idx) const {
428+
const Value& value = values_.at(idx);
429+
return value.isInt() || value.isDouble() || value.isBool() ||
430+
value.isNone();
431+
}
432+
427433
template <typename T>
428434
T extract_scalar(const ValueRef idx) {
429435
Value& value = values_.at(idx);
@@ -439,6 +445,15 @@ class ComputeGraph final {
439445
VK_THROW("Cannot extract scalar from Value with type ", value.type());
440446
}
441447

448+
template <typename T>
449+
T extract_scalar_or(const ValueRef idx, const T default_value) {
450+
Value& value = values_.at(idx);
451+
if (value.isNone()) {
452+
return default_value;
453+
}
454+
return extract_scalar<T>(idx);
455+
}
456+
442457
template <typename T>
443458
std::optional<T> extract_optional_scalar(const ValueRef idx) {
444459
if (val_is_none(idx)) {

backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#version 450 core
1010

1111
#define PRECISION ${PRECISION}
12+
#define UBO_PARAMS ${UBO_PARAMS}
1213

1314
#define VEC4_T ${texel_type(DTYPE)}
1415
#define T ${buffer_scalar_type(DTYPE)}
@@ -22,19 +23,27 @@ layout(std430) buffer;
2223
${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
2324
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}
2425

25-
$if OP_NAME == "slice":
26-
${layout_declare_ubo(B, "int", "start")}
27-
${layout_declare_ubo(B, "int", "step")}
26+
$if UBO_PARAMS:
27+
$if OP_NAME == "slice":
28+
${layout_declare_ubo(B, "int", "start")}
29+
${layout_declare_ubo(B, "int", "step")}
2830

29-
$if OP_NAME == "select":
30-
${layout_declare_ubo(B, "int", "index")}
31+
$if OP_NAME == "select":
32+
${layout_declare_ubo(B, "int", "index")}
3133

3234
layout(push_constant) uniform restrict Block {
3335
ivec4 in_sizes;
3436
ivec4 out_strides;
3537
ivec4 in_strides;
3638
int out_numel;
3739
int selected_dim;
40+
$if not UBO_PARAMS:
41+
$if OP_NAME == "slice":
42+
int start;
43+
int step;
44+
45+
$if OP_NAME == "select":
46+
int index;
3847
};
3948

4049
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}

0 commit comments

Comments
 (0)