Skip to content

Commit cec4576

Browse files
Merge branch 'pytorch:main' into coreml_backend_recipes
2 parents a757493 + 112a09f commit cec4576

File tree

143 files changed

+3067
-1194
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+3067
-1194
lines changed

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
262262

263263
assert torch.allclose(
264264
eager_output.logits, et_output, atol=1e-02, rtol=1e-02
265-
), "CoreML output does not match eager"
265+
), "Model output does not match eager"
266266

267267

268268
if __name__ == "__main__":
269269
parser = argparse.ArgumentParser()
270270
parser.add_argument("--model", type=str, required=True)
271271
parser.add_argument("--recipe", type=str, required=True)
272272
parser.add_argument("--quantize", action="store_true", help="Enable quantization")
273+
parser.add_argument(
274+
"--model_dir",
275+
type=str,
276+
required=False,
277+
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
278+
)
273279
args = parser.parse_args()
274280

275281
model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
294300
f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
295301
)
296302

303+
model_id, test_fn = model_to_model_id_and_test_function[args.model]
297304
with tempfile.TemporaryDirectory() as tmp_dir:
298-
model_id, test_fn = model_to_model_id_and_test_function[args.model]
299305
test_fn(
300306
model_id=model_id,
301-
model_dir=tmp_dir,
307+
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
302308
recipe=args.recipe,
303309
quantize=args.quantize,
304310
)

.ci/scripts/test_model.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ test_model_with_qnn() {
199199
EXPORT_SCRIPT=albert
200200
elif [[ "${MODEL_NAME}" == "bert" ]]; then
201201
EXPORT_SCRIPT=bert
202+
elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
203+
EXPORT_SCRIPT=conv_former
204+
EXTRA_FLAGS="--dataset imagenet-mini/val"
202205
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
203206
EXPORT_SCRIPT=cvt
204207
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +241,7 @@ test_model_with_qnn() {
238241
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
239242
SCRIPT_FOLDER=oss_scripts
240243
;;
241-
"albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
244+
"albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
242245
pip install evaluate
243246
SCRIPT_FOLDER=oss_scripts
244247
# 16bit models will encounter op validation fail on some operations,

.github/workflows/trunk.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ jobs:
568568
strategy:
569569
matrix:
570570
dtype: [fp32]
571-
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
571+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
572572
fail-fast: false
573573
with:
574574
runner: linux.2xlarge
@@ -815,6 +815,9 @@ jobs:
815815
smollm|coreml_fp32_gpu|--quantize,
816816
llama3|coreml_fp32_gpu|--quantize,
817817
olmo|coreml_fp32_gpu|--quantize,
818+
# roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
819+
bert|coreml_fp32_gpu|--quantize,
820+
distilbert|coreml_fp32_gpu|--quantize,
818821
]
819822
fail-fast: false
820823
with:

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# coremltools than is used by ExecuTorch. Each op registered here should have a link to a PR in coremltools that adds
99
# the op to the coremltools library.
1010

11+
import numpy as np
1112
import torch as _torch
1213
from coremltools import _logger
1314
from coremltools.converters.mil.frontend import _utils
@@ -21,7 +22,6 @@
2122
transpose,
2223
unbind,
2324
)
24-
2525
from coremltools.converters.mil.frontend.torch.torch_op_registry import (
2626
register_torch_op,
2727
)
@@ -132,3 +132,43 @@ def dequantize_affine(context, node):
132132
name=node.name,
133133
)
134134
context.add(output, node.name)
135+
136+
137+
@register_torch_op(
138+
torch_alias=["quant::dequantize_codebook", "quant.dequantize_codebook"],
139+
override=False,
140+
)
141+
def dequantize_codebook(context, node):
142+
inputs = _get_inputs(context, node, expected=[4, 5])
143+
codes = inputs[0].val
144+
codebook = inputs[1].val
145+
nbits = inputs[2].val
146+
147+
# information in block_size is redundant with codebook.shape
148+
block_size = inputs[3].val # noqa: F841
149+
150+
assert len(codes.shape) == 2, "Only rank 2 inputs are supported"
151+
152+
# Assert codebook is as expected. codebook.dim() = codes.dim() + 2
153+
assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook"
154+
assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported"
155+
n_luts = codebook.shape[1]
156+
assert (
157+
codes.shape[1] % n_luts == 0
158+
), "codes.shape[1] must be divisible by codebook.shape[1]"
159+
assert codebook.shape[2] == 2**nbits
160+
assert codebook.shape[3] == 1, "Only scalar look up values are supported"
161+
162+
if len(inputs) > 4:
163+
output_dtype = inputs[4].val
164+
out_np_dtype = NUM_TO_NUMPY_DTYPE[output_dtype]
165+
_logger.warning(
166+
f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
167+
)
168+
169+
output = _utils._construct_constexpr_lut_op(
170+
codes.astype(np.int8),
171+
codebook,
172+
name=node.name,
173+
)
174+
context.add(output, node.name)

backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,17 @@
8888
ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
8989
return std::nullopt;
9090
}
91-
91+
9292
std::vector<ssize_t> strides(tensor.strides().begin(), tensor.strides().end());
9393
std::vector<size_t> shape(tensor.sizes().begin(), tensor.sizes().end());
94-
94+
9595
// If tensor is rank 0, wrap in rank 1
9696
// See https://github.com/apple/coremltools/blob/8.2/coremltools/converters/mil/frontend/torch/exir_utils.py#L73
9797
if (shape.size() == 0) {
9898
shape.push_back(1);
9999
strides.push_back(1);
100100
}
101-
101+
102102
MultiArray::MemoryLayout layout(dataType.value(), std::move(shape), std::move(strides));
103103
switch (argType) {
104104
case ArgType::Input: {
@@ -281,9 +281,11 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
281281
}
282282

283283
namespace {
284-
auto cls = CoreMLBackendDelegate();
285-
Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, &cls};
286-
static auto success_with_compiler = register_backend(backend);
284+
#ifndef LAZY_LOAD_IOS_PYTORCH_INITIALIZER
285+
auto cls = CoreMLBackendDelegate();
286+
Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, &cls};
287+
static auto success_with_compiler = register_backend(backend);
288+
#endif
287289
}
288290

289291
} // namespace coreml
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma once
2+
3+
namespace executorch::core_ml_backend_delegate {
4+
void register_backend_coreml();
5+
} // namespace executorch::core_ml_backend_delegate
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#pragma once
2+
3+
#include "executorch_operations.h"
4+
#import <coreml_backend/delegate.h>
5+
#import "ETCoreMLStrings.h"
6+
#import "backend_delegate.h"
7+
8+
#import <executorch/runtime/core/evalue.h>
9+
#import <executorch/runtime/platform/log.h>
10+
#import <executorch/runtime/backend/interface.h>
11+
12+
#include <array>
13+
#import <memory>
14+
15+
namespace executorch::core_ml_backend_delegate {
16+
using executorch::runtime::get_backend_class;
17+
18+
static std::unique_ptr<executorch::backends::coreml::CoreMLBackendDelegate> backendInterfaceLazy_;
19+
20+
void register_backend_coreml() {
21+
auto backendInterface = executorch::runtime::get_backend_class(ETCoreMLStrings.delegateIdentifier.UTF8String);
22+
if (backendInterface == nullptr) {
23+
backendInterfaceLazy_ = std::make_unique<executorch::backends::coreml::CoreMLBackendDelegate>();
24+
executorch::runtime::Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, backendInterfaceLazy_.get()};
25+
std::ignore = register_backend(backend);
26+
}
27+
}
28+
29+
} // namespace executorch::core_ml_backend_delegate

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
from executorch.backends.apple.coreml.compiler import CoreMLBackend
1616
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
17+
from executorch.exir.backend.utils import format_delegated_graph
18+
19+
from torchao.prototype.quantization.codebook_coreml import CodebookWeightOnlyConfig
1720
from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
1821

1922

@@ -164,6 +167,61 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
164167
et_prog = delegated_program.to_executorch()
165168
self._compare_outputs(et_prog, model, example_inputs)
166169

170+
def test_dequantize_codebook_linear(self):
171+
model, example_inputs = self._get_test_model()
172+
quantize_(
173+
model,
174+
CodebookWeightOnlyConfig(dtype=torch.uint2, block_size=[-1, 16]),
175+
)
176+
ep = torch.export.export(model, example_inputs)
177+
assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
178+
delegated_program = executorch.exir.to_edge_transform_and_lower(
179+
ep,
180+
partitioner=[self._coreml_partitioner()],
181+
)
182+
for node in delegated_program.exported_program().graph.nodes:
183+
if node.op == "call_function":
184+
assert node.target.__name__ in [
185+
"executorch_call_delegate",
186+
"getitem",
187+
], f"Got unexpected node target after delegation: {node.target.__name__}"
188+
189+
assert (
190+
"executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
191+
in format_delegated_graph(delegated_program.exported_program().graph_module)
192+
)
193+
194+
et_prog = delegated_program.to_executorch()
195+
self._compare_outputs(et_prog, model, example_inputs)
196+
197+
def test_dequantize_codebook_embedding(self):
198+
model, example_inputs = self._get_test_model()
199+
quantize_(
200+
model,
201+
CodebookWeightOnlyConfig(dtype=torch.uint3, block_size=[-1, 16]),
202+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
203+
)
204+
ep = torch.export.export(model, example_inputs)
205+
assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
206+
delegated_program = executorch.exir.to_edge_transform_and_lower(
207+
ep,
208+
partitioner=[self._coreml_partitioner()],
209+
)
210+
for node in delegated_program.exported_program().graph.nodes:
211+
if node.op == "call_function":
212+
assert node.target.__name__ in [
213+
"executorch_call_delegate",
214+
"getitem",
215+
], f"Got unexpected node target after delegation: {node.target.__name__}"
216+
217+
assert (
218+
"executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
219+
in format_delegated_graph(delegated_program.exported_program().graph_module)
220+
)
221+
222+
et_prog = delegated_program.to_executorch()
223+
self._compare_outputs(et_prog, model, example_inputs)
224+
167225

168226
if __name__ == "__main__":
169227
test_runner = TestTorchOps()
@@ -172,3 +230,5 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
172230
test_runner.test_dequantize_affine_c4w_embedding()
173231
test_runner.test_dequantize_affine_c4w_linear()
174232
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
233+
test_runner.test_dequantize_codebook_linear()
234+
test_runner.test_dequantize_codebook_embedding()

backends/arm/arm_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def vgf_compile_spec(
5757
f"Invalid TOSA version: {tosa_version}"
5858
)
5959

60-
if not ("FP" or "INT" in tosa_profiles):
60+
if "FP" not in tosa_profiles and "INT" not in tosa_profiles:
6161
raise ValueError(
6262
"Arm backend only supports converter-backend for FP or INT. "
6363
f"Invalid TOSA profile: {tosa_profiles}"

backends/arm/quantizer/arm_quantizer.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from __future__ import annotations
1515

1616
import functools
17-
from typing import Any, Callable, Dict, List, Optional
17+
from typing import Any, Callable, Dict, List, Optional, Union
1818

1919
import torch
2020
from executorch.backends.arm._passes import ArmPassManager
@@ -218,9 +218,35 @@ def not_module_type_or_name_filter(n: Node) -> bool:
218218

219219
class TOSAQuantizer(Quantizer):
220220

221-
def __init__(self, tosa_spec: TosaSpecification) -> None:
221+
def __init__(
222+
self, compile_spec_or_tosa_spec: Union[TosaSpecification, List[CompileSpec]]
223+
) -> None:
224+
222225
super().__init__()
223-
self.tosa_spec = tosa_spec
226+
if isinstance(compile_spec_or_tosa_spec, TosaSpecification):
227+
self.tosa_spec = compile_spec_or_tosa_spec
228+
self.compile_spec = None
229+
elif isinstance(compile_spec_or_tosa_spec, list):
230+
self.compile_spec = compile_spec_or_tosa_spec
231+
# find entry that is 'tosa_spec'
232+
for cs in compile_spec_or_tosa_spec:
233+
if cs.key == "tosa_spec":
234+
spec_val = (
235+
cs.value.decode() if isinstance(cs.value, bytes) else cs.value
236+
)
237+
self.tosa_spec = TosaSpecification.create_from_string(spec_val)
238+
break
239+
else:
240+
raise ValueError(
241+
"compile_spec list did not contain a 'tosa_spec' entry"
242+
)
243+
else:
244+
raise TypeError(
245+
f"TOSAQuantizer constructor expects "
246+
f"a TosaSpecification or compile_spec list, "
247+
f"got {type(compile_spec_or_tosa_spec)}"
248+
)
249+
224250
self.global_config: Optional[QuantizationConfig] = None
225251
self.io_config: Optional[QuantizationConfig] = None
226252
self.module_type_config: Dict[Callable, Optional[QuantizationConfig]] = {}

0 commit comments

Comments
 (0)