Skip to content

Commit c46ef87

Browse files
author
ssjia
committed
Update on "[ET-VK] Implement linear_q4gsw"
As title. Extend the quantized linear implementation to be able to handle 4-bit per group symmetrically quantized weights. This is in preparation to support using the int8 dot product extension to be able to handle dynamically quantized inputs. Differential Revision: [D81800023](https://our.internmc.facebook.com/intern/diff/D81800023/) [ghstack-poisoned]
2 parents 9cc8a5c + be1ab37 commit c46ef87

File tree

90 files changed

+1722
-743
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+1722
-743
lines changed

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ test_model() {
9797
bash examples/models/llava/install_requirements.sh
9898
STRICT="--no-strict"
9999
fi
100-
if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
100+
if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
101101
# Install requirements for export_llama
102102
bash examples/models/llama/install_requirements.sh
103103
# Test export_llm script: python3 -m extension.llm.export.export_llm.

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ jobs:
176176
- model: phi_4_mini
177177
backend: portable
178178
runner: linux.arm64.m7g.4xlarge
179-
- model: qwen2_5
179+
- model: qwen2_5_1_5b
180180
backend: portable
181181
runner: linux.arm64.2xlarge
182182
- model: llama3_2_vision_encoder

CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,7 @@ if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
699699
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
700700
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
701701
)
702-
add_subdirectory(
703-
${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
704-
)
702+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/csrc/cpu)
705703
unset(EXECUTORCH_INCLUDE_DIRS)
706704

707705
executorch_target_link_options_shared_lib(torchao_ops_executorch)

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def dequantize_affine(context, node):
175175
int_data.astype(quantized_np_dtype),
176176
zero_point,
177177
scale,
178-
axis=-1,
179178
name=node.name,
180179
)
181180
context.add(output, node.name)

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
class TestTorchOps(unittest.TestCase):
2828
edge_compile_config = executorch.exir.EdgeCompileConfig()
2929

30-
def _coreml_partitioner(self):
30+
def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18):
3131
compile_specs = CoreMLBackend.generate_compile_specs(
32-
minimum_deployment_target=ct.target.iOS18
32+
minimum_deployment_target=minimum_deployment_target
3333
)
3434
return CoreMLPartitioner(compile_specs=compile_specs)
3535

@@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
158158
et_prog = delegated_program.to_executorch()
159159
self._compare_outputs(et_prog, model, example_inputs)
160160

161+
def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self):
162+
model, example_inputs = self._get_test_model()
163+
quantize_(
164+
model,
165+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
166+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
167+
)
168+
quantize_(
169+
model,
170+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
171+
)
172+
ep = torch.export.export(model, example_inputs)
173+
delegated_program = executorch.exir.to_edge_transform_and_lower(
174+
ep,
175+
partitioner=[
176+
self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16)
177+
],
178+
)
179+
for node in delegated_program.exported_program().graph.nodes:
180+
if node.op == "call_function":
181+
assert node.target.__name__ in [
182+
"executorch_call_delegate",
183+
"getitem",
184+
], f"Got unexpected node target after delegation: {node.target.__name__}"
185+
et_prog = delegated_program.to_executorch()
186+
self._compare_outputs(et_prog, model, example_inputs)
187+
161188
def test_dequantize_codebook_linear_per_grouped_col(self):
162189
model, example_inputs = self._get_test_model()
163190
quantize_(
@@ -298,6 +325,7 @@ def forward(self, x):
298325
test_runner.test_dequantize_affine_c4w_embedding()
299326
test_runner.test_dequantize_affine_c4w_linear()
300327
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
328+
test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16()
301329
test_runner.test_dequantize_codebook_linear_per_grouped_col()
302330
test_runner.test_dequantize_codebook_linear_per_grouped_row()
303331
test_runner.test_dequantize_codebook_embedding_per_grouped_col()

backends/arm/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ For more information on TOSA see https://www.mlplatform.org/tosa/tosa_spec.html
3434
## Layout of key components
3535

3636
Export:
37-
* `tosa_backend.py` - The TOSA conversion flow all other backends rely on.
37+
* `tosa/backend.py` - The TOSA conversion flow all other backends rely on.
3838
* `ethosu/backend.py` - Main entrypoint for the EthosUBackend.
3939
* `vgf_backend.py` - Main entrypoint for VgfBackend.
4040
* For more information see the section on [Arm Backend Architecture](#arm-backend-architecture).

backends/arm/TARGETS

Lines changed: 6 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,15 @@ python_library(
3737
python_library(
3838
name = "arm_partitioner",
3939
srcs = [
40-
"tosa_backend.py",
41-
"tosa_partitioner.py",
40+
"tosa/backend.py",
41+
"tosa/partitioner.py",
4242
"vgf_backend.py",
4343
"vgf_partitioner.py",
4444
],
4545
deps = [
4646
":arm_backend",
4747
":constants",
48+
"//executorch/backends/arm/debug:schema",
4849
"//executorch/backends/arm/operator_support:operator_support",
4950
"//executorch/backends/arm/_passes:passes",
5051
"//executorch/exir:lib",
@@ -76,9 +77,9 @@ python_library(
7677
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
7778
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
7879
"//executorch/backends/arm/operators:node_visitor",
79-
"//executorch/backends/arm:tosa_mapping",
80-
"//executorch/backends/arm:tosa_quant_utils",
81-
"//executorch/backends/arm:tosa_utils",
80+
"//executorch/backends/arm/tosa:mapping",
81+
"//executorch/backends/arm/tosa:quant_utils",
82+
"//executorch/backends/arm/tosa:utils",
8283
"//executorch/exir:lib",
8384
],
8485
)
@@ -91,54 +92,6 @@ python_library(
9192
"fbsource//third-party/pypi/ethos-u-vela:ethos-u-vela",
9293
],
9394
)
94-
python_library(
95-
name = "tosa_mapping",
96-
srcs = [
97-
"tosa_mapping.py",
98-
],
99-
deps = [
100-
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
101-
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
102-
"//caffe2:torch",
103-
],
104-
)
105-
python_library(
106-
name = "tosa_quant_utils",
107-
srcs = [
108-
"tosa_quant_utils.py",
109-
],
110-
deps = [
111-
"fbsource//third-party/pypi/numpy:numpy",
112-
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
113-
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
114-
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
115-
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
116-
":constants",
117-
":tosa_mapping",
118-
"//executorch/exir/dialects:lib",
119-
],
120-
)
121-
python_library(
122-
name = "tosa_specification",
123-
srcs = [
124-
"tosa_specification.py",
125-
],
126-
deps = [
127-
"fbsource//third-party/pypi/packaging:packaging",
128-
"//executorch/exir/backend:compile_spec_schema",
129-
],
130-
)
131-
python_library(
132-
name = "tosa_utils",
133-
srcs = [
134-
"tosa_utils.py",
135-
],
136-
deps = [
137-
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
138-
":tosa_quant_utils",
139-
"//executorch/backends/arm/operators:node_visitor",
140-
],
141-
)
14295
python_library(
14396
name = "arm_model_evaluator",
14497
srcs = [

backends/arm/_passes/TARGETS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ python_library(
66
deps = [
77
"//executorch/backends/arm:common",
88
"//executorch/backends/arm:constants",
9-
"//executorch/backends/arm:tosa_quant_utils",
10-
"//executorch/backends/arm:tosa_utils",
9+
"//executorch/backends/arm/tosa:quant_utils",
10+
"//executorch/backends/arm/tosa:utils",
1111
"//executorch/backends/arm/tosa/dialect:lib",
1212
"//executorch/backends/transforms:fuse_view_copy",
1313
"//executorch/backends/transforms:remove_getitem_op",

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass # noqa
3838
from .decompose_cumsum_pass import DecomposeCumsumPass # noqa
3939
from .decompose_div_pass import DecomposeDivPass # noqa
40+
from .decompose_div_tensor_mode import DecomposeDivTensorModePass # noqa
4041
from .decompose_elu_pass import DecomposeEluPass # noqa
4142
from .decompose_embedding_pass import DecomposeEmbeddingPass # noqa # noqa
4243
from .decompose_expm1_pass import DecomposeExpm1Pass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
DecomposeCosineSimilarityPass,
4343
DecomposeCumsumPass,
4444
DecomposeDivPass,
45+
DecomposeDivTensorModePass,
4546
DecomposeEluPass,
4647
DecomposeEmbeddingPass,
4748
DecomposeExpm1Pass,
@@ -211,6 +212,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
211212
DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
212213
)
213214
self.add_pass(DecomposeNotEqualPass())
215+
self.add_pass(DecomposeDivTensorModePass())
214216
self.add_pass(DecomposeDivPass())
215217
self.add_pass(DecomposeSoftmaxPass())
216218
self.add_pass(DecomposeGeluPass())
@@ -289,6 +291,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
289291
self.add_pass(DecomposeNotEqualPass())
290292
self.add_pass(DecomposeCosineSimilarityPass())
291293
self.add_pass(DecomposeGluPass())
294+
self.add_pass(DecomposeDivTensorModePass())
292295
self.add_pass(DecomposeDivPass())
293296
self.add_pass(DecomposeLeakyReLUPass())
294297
self.add_pass(DecomposeLinearVectorNormPass())

0 commit comments

Comments
 (0)