|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +# This file registers torch ops that are not yet in coremltools, or are in a more recent version of |
| 8 | +# coremltools than is used by ExecuTorch. Each op registered here should have a link to a PR in coremltools that adds |
| 9 | +# the op to the coremltools library. |
| 10 | + |
| 11 | +import torch as _torch |
| 12 | +from coremltools import _logger as logger |
| 13 | +from coremltools.converters.mil.frontend import _utils |
| 14 | +from coremltools.converters.mil.frontend.torch.ops import ( |
| 15 | + _get_inputs, |
| 16 | + NUM_TO_NUMPY_DTYPE, |
| 17 | + NUM_TO_TORCH_DTYPE, |
| 18 | + transpose, |
| 19 | + unbind, |
| 20 | +) |
| 21 | + |
| 22 | +from coremltools.converters.mil.frontend.torch.torch_op_registry import ( |
| 23 | + register_torch_op, |
| 24 | +) |
| 25 | +from coremltools.converters.mil.mil import types |
| 26 | + |
| 27 | + |
| 28 | +# https://github.com/apple/coremltools/pull/2556 |
| 29 | +@register_torch_op(override=False) |
| 30 | +def transpose_copy(context, node): |
| 31 | + transpose(context, node) |
| 32 | + |
| 33 | + |
| 34 | +# https://github.com/apple/coremltools/pull/2557 |
| 35 | +@register_torch_op(override=False) |
| 36 | +def unbind_copy(context, node): |
| 37 | + unbind(context, node) |
| 38 | + |
| 39 | + |
| 40 | +# https://github.com/apple/coremltools/pull/2558 |
| 41 | +@register_torch_op( |
| 42 | + torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"], |
| 43 | + override=False, |
| 44 | +) |
| 45 | +def dequantize_affine(context, node): |
| 46 | + inputs = _get_inputs(context, node, expected=[7, 8]) |
| 47 | + int_data = inputs[0].val |
| 48 | + block_size = inputs[1].val |
| 49 | + scale = inputs[2].val |
| 50 | + zero_point = ( |
| 51 | + inputs[3].val if inputs[3] is not None and inputs[3].val is not None else None |
| 52 | + ) |
| 53 | + # I do not think we need to worry about input_dtype b/c it gets cast to int4/int8 |
| 54 | + # For now, we just check that it is int8 or int32 |
| 55 | + input_dtype = inputs[4].val # noqa: F841 |
| 56 | + assert NUM_TO_TORCH_DTYPE[input_dtype] in [ |
| 57 | + _torch.int8, |
| 58 | + _torch.int32, |
| 59 | + ], "input_dtype should be int8 or int32" |
| 60 | + |
| 61 | + quant_min = inputs[5].val |
| 62 | + quant_max = inputs[6].val |
| 63 | + |
| 64 | + assert len(int_data.shape) == 2, "dequantize_affine only supports rank 2 inputs" |
| 65 | + |
| 66 | + assert len(int_data.shape) == len( |
| 67 | + block_size |
| 68 | + ), "block_size must have the same length as int_data.shape" |
| 69 | + assert block_size[0] == 1, "block_size[0] must be 1" |
| 70 | + group_size = block_size[1] |
| 71 | + k = int_data.shape[1] |
| 72 | + assert k % group_size == 0, "k must be divisible by group_size" |
| 73 | + scales_per_row = k // group_size |
| 74 | + scale = scale.reshape(-1, scales_per_row) |
| 75 | + if zero_point is not None: |
| 76 | + zero_point = zero_point.reshape(-1, scales_per_row) |
| 77 | + |
| 78 | + # TODO: I don't know if CoreML can make use of this |
| 79 | + # We could add a cast op to the output, but I'm pretty CoreML will remove this during a later pass |
| 80 | + # For now, we just log a warning |
| 81 | + out_np_dtype = None |
| 82 | + if len(inputs) > 7: |
| 83 | + out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val] |
| 84 | + logger.warning( |
| 85 | + f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision." |
| 86 | + ) |
| 87 | + |
| 88 | + if quant_min == -8 and quant_max == 7: |
| 89 | + quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("int4")) |
| 90 | + elif quant_min == -128 and quant_max == 127: |
| 91 | + quantized_np_dtype = types.nptype_from_builtin(types.string_to_builtin("int8")) |
| 92 | + else: |
| 93 | + raise ValueError( |
| 94 | + f"Unsupported quantization range: {quant_min} to {quant_max}. CoreML only supports 4-bit and 8-bit quantization." |
| 95 | + ) |
| 96 | + |
| 97 | + output = _utils._construct_constexpr_dequant_op( |
| 98 | + int_data.astype(quantized_np_dtype), |
| 99 | + zero_point, |
| 100 | + scale, |
| 101 | + axis=-1, |
| 102 | + name=node.name, |
| 103 | + ) |
| 104 | + context.add(output, node.name) |
0 commit comments