Skip to content

Commit 8505bbb

Browse files
committed
Update base for Update on "PT2ArchiveDataMap"
This diff introduces the PT2ArchiveDataMap, which reads the weights in .pt2 archive files. 1. Open the archive file with miniz 2. There are two json config files (weights_config.json, constants_config.json) with information on weight name -> {weight path, weight metadata}. Open and extract weight information into unordered_maps; then free the json blobs. 3. For get_tensor_layout calls, return the json information. 4. For get_data calls, use miniz to calculate the offset + size and then use data loader. PT2 archive files are not compressed (to allow mmap-ing), so this is fine. PT2 archive format: https://docs.google.com/document/d/1xdx3I4zK6naPEWX3e49rCUccZeAC9zMLCFKXvUQFR7o/edit?tab=t.0 Serde: https://docs.google.com/document/d/11X-KsLPMJGdEr4sG4sCNLnGLhSKrc8utDGMQqFbZx9E/edit?tab=t.0#heading=h.tsw6d16xh497 --- TODO in subsequent diffs - convert stride to dim order - Additional testing; failure cases, model file with constants as well as weights, model with no weights. - CMake for OSS Differential Revision: [D81248896](https://our.internmc.facebook.com/intern/diff/D81248896/) [ghstack-poisoned]
2 parents 8c79a53 + 151f3be commit 8505bbb

File tree

444 files changed

+20895
-2645
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

444 files changed

+20895
-2645
lines changed

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ test_model() {
9797
bash examples/models/llava/install_requirements.sh
9898
STRICT="--no-strict"
9999
fi
100-
if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
100+
if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
101101
# Install requirements for export_llama
102102
bash examples/models/llama/install_requirements.sh
103103
# Test export_llm script: python3 -m extension.llm.export.export_llm.

.github/workflows/pull.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,14 @@ jobs:
929929
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
930930
.ci/scripts/setup-linux.sh --build-tool "cmake"
931931
932+
# Custom operator tests
932933
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
934+
./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
935+
./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
936+
937+
# Run e2e testing for selected operators. More operators will be tested via this
938+
# route in the future.
939+
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
933940
934941
nxp-build-test:
935942
name: nxp-build-test

.github/workflows/trunk.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ jobs:
176176
- model: phi_4_mini
177177
backend: portable
178178
runner: linux.arm64.m7g.4xlarge
179-
- model: qwen2_5
179+
- model: qwen2_5_1_5b
180180
backend: portable
181181
runner: linux.arm64.2xlarge
182182
- model: llama3_2_vision_encoder
@@ -823,10 +823,10 @@ jobs:
823823
--tsv_path ${TSV_PATH}
824824
echo "::endgroup::"
825825
826-
test-huggingface-transformers-coreml:
826+
test-huggingface-transformers-macos:
827827
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
828828
if: ${{ !github.event.pull_request.head.repo.fork }}
829-
name: test-huggingface-transformers-coreml
829+
name: test-huggingface-transformers-macos
830830
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
831831
permissions:
832832
id-token: write
@@ -844,10 +844,10 @@ jobs:
844844
# phi4-mini|xnnpack|--quantize,
845845
# smollm2-135m|xnnpack|--quantize,
846846
# smollm3-3b|xnnpack|--quantize,
847+
# qwen3-1.7b|xnnpack|--quantize,
847848
# CoreML.
848849
llama3.2-1b|coreml_fp32_gpu|--quantize,
849850
qwen3-0.6b|coreml_fp32_gpu|--quantize,
850-
qwen3-1.7b|xnnpack|--quantize,
851851
smollm2-135m|coreml_fp32_gpu|--quantize,
852852
olmo-1b|coreml_fp32_gpu|--quantize,
853853
bert|coreml_fp32_gpu|--quantize,

CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,7 @@ if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
699699
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
700700
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
701701
)
702-
add_subdirectory(
703-
${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
704-
)
702+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/csrc/cpu)
705703
unset(EXECUTORCH_INCLUDE_DIRS)
706704

707705
executorch_target_link_options_shared_lib(torchao_ops_executorch)

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ To get started you can:
5252

5353
- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
5454
- Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
55-
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
55+
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
5656

5757
## Feedback and Engagement
5858

backends/apple/coreml/TARGETS

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,21 @@ runtime.python_library(
6161
)
6262

6363
runtime.python_library(
64-
name = "recipes",
65-
srcs = glob([
66-
"recipes/*.py",
67-
]),
64+
name = "coreml_recipes",
65+
srcs = [
66+
"recipes/__init__.py",
67+
"recipes/coreml_recipe_provider.py"
68+
],
6869
visibility = [
6970
"@EXECUTORCH_CLIENTS",
71+
"//executorch/export/...",
7072
],
7173
deps = [
7274
"fbsource//third-party/pypi/coremltools:coremltools",
75+
":coreml_recipe_types",
7376
":backend",
77+
":partitioner",
78+
":quantizer",
7479
"//caffe2:torch",
7580
"//executorch/exir:lib",
7681
"//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
8085
],
8186
)
8287

88+
runtime.python_library(
89+
name = "coreml_recipe_types",
90+
srcs = [
91+
"recipes/coreml_recipe_types.py",
92+
],
93+
visibility = [
94+
"@EXECUTORCH_CLIENTS",
95+
"//executorch/export/...",
96+
],
97+
deps = [
98+
"//executorch/export:recipe",
99+
],
100+
)
101+
83102
runtime.cxx_python_extension(
84103
name = "executorchcoreml",
85104
srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
124143
"fbsource//third-party/pypi/pytest:pytest",
125144
":partitioner",
126145
":quantizer",
127-
":recipes",
146+
":coreml_recipes",
128147
"//caffe2:torch",
129148
"//pytorch/vision:torchvision",
130149
"fbsource//third-party/pypi/scikit-learn:scikit-learn",

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from coremltools.converters.mil.frontend.torch.ops import (
1616
_get_inputs,
1717
_get_kwinputs,
18+
noop,
1819
NUM_TO_NUMPY_DTYPE,
1920
NUM_TO_TORCH_DTYPE,
2021
split,
@@ -91,6 +92,28 @@ def _to_dim_order_copy(context, node):
9192
to(context, node)
9293

9394

95+
@register_torch_op(
96+
torch_alias=[
97+
"dim_order_ops::_clone_dim_order",
98+
"dim_order_ops._clone_dim_order",
99+
],
100+
override=False,
101+
)
102+
def _clone_dim_order(context, node):
103+
dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
104+
node.kwinputs.pop("dim_order")
105+
106+
# In CoreML, dim_order.val will be a ndarray, so we convert it to a list to check memory format.
107+
dim_order = [int(d) for d in dim_order.val]
108+
memory_format = get_memory_format(dim_order)
109+
assert (
110+
memory_format == _torch.contiguous_format
111+
), "Only contiguous memory format is supported in CoreML"
112+
113+
# Since CoreML only supports contiguous format, no dim_order preservation is needed. Treat this as a no-op clone.
114+
noop(context, node)
115+
116+
94117
# https://github.com/apple/coremltools/pull/2558
95118
@register_torch_op(
96119
torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
@@ -152,7 +175,6 @@ def dequantize_affine(context, node):
152175
int_data.astype(quantized_np_dtype),
153176
zero_point,
154177
scale,
155-
axis=-1,
156178
name=node.name,
157179
)
158180
context.add(output, node.name)

backends/apple/coreml/recipes/coreml_recipe_provider.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
44

55

6+
import logging
67
from typing import Any, Optional, Sequence
78

89
import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
111112

112113
unexpected = set(kwargs.keys()) - expected_keys
113114
if unexpected:
114-
raise ValueError(
115-
f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
115+
logging.warning(
116+
f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
117+
f"Expected parameters: {list(expected_keys)}"
116118
)
117119

118120
self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
121123

122124
def _get_expected_keys(self, recipe_type: RecipeType) -> set:
123125
"""Get expected parameter keys for a recipe type"""
124-
common_keys = {"minimum_deployment_target", "compute_unit"}
126+
common_keys = {
127+
"minimum_deployment_target",
128+
"compute_unit",
129+
"skip_ops_for_coreml_delegation",
130+
"lower_full_graph",
131+
"take_over_constant_data",
132+
}
125133

126134
if recipe_type in [
127135
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
377385
if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
378386
take_over_mutable_buffer = False
379387

388+
# Extract additional partitioner parameters
389+
skip_ops_for_coreml_delegation = kwargs.get(
390+
"skip_ops_for_coreml_delegation", None
391+
)
392+
lower_full_graph = kwargs.get("lower_full_graph", False)
393+
take_over_constant_data = kwargs.get("take_over_constant_data", True)
394+
380395
partitioner = CoreMLPartitioner(
381396
compile_specs=compile_specs,
382397
take_over_mutable_buffer=take_over_mutable_buffer,
398+
skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
399+
lower_full_graph=lower_full_graph,
400+
take_over_constant_data=take_over_constant_data,
383401
)
384402

385403
edge_compile_config = EdgeCompileConfig(

backends/apple/coreml/test/test_coreml_recipes.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
185185
)
186186
self.assertIn("must be positive", str(cm.exception))
187187

188-
# Test unexpected parameter
189-
with self.assertRaises(ValueError) as cm:
190-
self.provider.create_recipe(
191-
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
192-
group_size=32, # group_size not valid for per-channel
193-
)
194-
self.assertIn("unexpected parameters", str(cm.exception))
195-
196188
def test_int8_weight_only_per_channel(self):
197189
"""Test INT8 weight-only per-channel quantization"""
198190
model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
385377
self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
386378
self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
387379

388-
def test_pt2e_recipes_parameter_rejection(self):
389-
"""Test that PT2E recipes reject TorchAO-specific parameters"""
390-
# PT2E recipes should reject TorchAO-specific parameters
391-
pt2e_recipes = [
392-
CoreMLRecipeType.PT2E_INT8_STATIC,
393-
CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
394-
]
395-
torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
396-
397-
for recipe_type in pt2e_recipes:
398-
for param in torchao_params:
399-
with self.subTest(recipe=recipe_type.value, param=param):
400-
kwargs = {param: "dummy_value"}
401-
with self.assertRaises(ValueError) as cm:
402-
self.provider.create_recipe(recipe_type, **kwargs)
403-
self.assertIn("unexpected parameters", str(cm.exception).lower())
404-
405380
def test_filter_fn_comprehensive(self):
406381
"""Comprehensive test for filter_fn parameter functionality"""
407382

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
class TestTorchOps(unittest.TestCase):
2828
edge_compile_config = executorch.exir.EdgeCompileConfig()
2929

30-
def _coreml_partitioner(self):
30+
def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18):
3131
compile_specs = CoreMLBackend.generate_compile_specs(
32-
minimum_deployment_target=ct.target.iOS18
32+
minimum_deployment_target=minimum_deployment_target
3333
)
3434
return CoreMLPartitioner(compile_specs=compile_specs)
3535

@@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
158158
et_prog = delegated_program.to_executorch()
159159
self._compare_outputs(et_prog, model, example_inputs)
160160

161+
def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self):
162+
model, example_inputs = self._get_test_model()
163+
quantize_(
164+
model,
165+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
166+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
167+
)
168+
quantize_(
169+
model,
170+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
171+
)
172+
ep = torch.export.export(model, example_inputs)
173+
delegated_program = executorch.exir.to_edge_transform_and_lower(
174+
ep,
175+
partitioner=[
176+
self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16)
177+
],
178+
)
179+
for node in delegated_program.exported_program().graph.nodes:
180+
if node.op == "call_function":
181+
assert node.target.__name__ in [
182+
"executorch_call_delegate",
183+
"getitem",
184+
], f"Got unexpected node target after delegation: {node.target.__name__}"
185+
et_prog = delegated_program.to_executorch()
186+
self._compare_outputs(et_prog, model, example_inputs)
187+
161188
def test_dequantize_codebook_linear_per_grouped_col(self):
162189
model, example_inputs = self._get_test_model()
163190
quantize_(
@@ -268,6 +295,28 @@ def test_dequantize_codebook_embedding_per_grouped_row(self):
268295
et_prog = delegated_program.to_executorch()
269296
self._compare_outputs(et_prog, model, example_inputs)
270297

298+
def test__clone_dim_order_contiguous(self):
299+
class Model(torch.nn.Module):
300+
def forward(self, x):
301+
return torch.ops.dim_order_ops._clone_dim_order(
302+
x, dim_order=[0, 1, 2, 3]
303+
)
304+
305+
model, example_inputs = Model(), (torch.randn(1, 3, 8, 8),)
306+
ep = torch.export.export(model, example_inputs)
307+
delegated_program = executorch.exir.to_edge_transform_and_lower(
308+
ep,
309+
partitioner=[self._coreml_partitioner()],
310+
)
311+
for node in delegated_program.exported_program().graph.nodes:
312+
if node.op == "call_function":
313+
assert node.target.__name__ in [
314+
"executorch_call_delegate",
315+
"getitem",
316+
], f"Got unexpected node target after delegation: {node.target.__name__}"
317+
et_prog = delegated_program.to_executorch()
318+
self._compare_outputs(et_prog, model, example_inputs)
319+
271320

272321
if __name__ == "__main__":
273322
test_runner = TestTorchOps()
@@ -276,7 +325,9 @@ def test_dequantize_codebook_embedding_per_grouped_row(self):
276325
test_runner.test_dequantize_affine_c4w_embedding()
277326
test_runner.test_dequantize_affine_c4w_linear()
278327
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
328+
test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16()
279329
test_runner.test_dequantize_codebook_linear_per_grouped_col()
280330
test_runner.test_dequantize_codebook_linear_per_grouped_row()
281331
test_runner.test_dequantize_codebook_embedding_per_grouped_col()
282332
test_runner.test_dequantize_codebook_embedding_per_grouped_row()
333+
test_runner.test__clone_dim_order_contiguous()

0 commit comments

Comments
 (0)