Skip to content

Commit cdc19f8

Browse files
Rework HWConfig (#3793)
### Changes Replace JSON-based configuration loading with a Python-style HW Config. ### Reason for changes Simplifies modification and maintenance by keeping configuration logic in Python code instead of external JSON files. ### Tests https://github.com/openvinotoolkit/nncf/actions/runs/20596514331 manual/job/post_training_quantization/767/ https://github.com/openvinotoolkit/nncf/actions/runs/20596794339
1 parent 2c4aaae commit cdc19f8

File tree

14 files changed

+1037
-2311
lines changed

14 files changed

+1037
-2311
lines changed

src/nncf/common/hardware/config.py

Lines changed: 114 additions & 183 deletions
Large diffs are not rendered by default.

src/nncf/common/hardware/configs/cpu.json

Lines changed: 0 additions & 617 deletions
This file was deleted.

src/nncf/common/hardware/configs/gpu.json

Lines changed: 0 additions & 627 deletions
This file was deleted.

src/nncf/common/hardware/configs/npu.json

Lines changed: 0 additions & 740 deletions
This file was deleted.

src/nncf/common/hardware/configs/template.md

Lines changed: 0 additions & 78 deletions
This file was deleted.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright (c) 2025 Intel Corporation
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
from dataclasses import dataclass
13+
from dataclasses import field
14+
from itertools import product
15+
from typing import Any
16+
17+
from nncf.common.quantization.structs import QuantizationScheme
18+
from nncf.common.quantization.structs import QuantizerConfig
19+
from nncf.parameters import StrEnum
20+
21+
SCALES = "scales"
22+
UNIFIED = "unified"
23+
ADJUST_PADDING = "adjust_padding"
24+
25+
26+
class Granularity(StrEnum):
27+
PER_CHANNEL = "per_channel"
28+
PER_TENSOR = "per_tensor"
29+
30+
31+
@dataclass(frozen=True, kw_only=True, slots=True)
32+
class QConfigSpace:
33+
"""
34+
A class to represent the configuration space for quantization.
35+
36+
:param bits: Number of bits for quantization.
37+
:param mode: Available quantization schemes.
38+
:param granularity: Granularity options for quantization.
39+
:param narrow_range: Indicates narrow range quantization.
40+
:param signedness_to_force: Optional signedness enforcement.
41+
"""
42+
43+
bits: int
44+
mode: tuple[QuantizationScheme, ...]
45+
granularity: tuple[Granularity, ...]
46+
narrow_range: tuple[bool, ...]
47+
signedness_to_force: bool | None = None
48+
49+
def get_all_qconfigs(self) -> list[QuantizerConfig]:
50+
"""
51+
Generate a list of all possible QuantizerConfig instances based on the current
52+
settings of mode, granularity, narrow_range, and other parameters.
53+
54+
:return: A list of QuantizerConfig objects, each representing
55+
a unique combination of the quantization parameters.
56+
"""
57+
ret = []
58+
for mode, granularity, narrow_range in product(self.mode, self.granularity, self.narrow_range):
59+
ret.append(
60+
QuantizerConfig(
61+
num_bits=self.bits,
62+
mode=mode,
63+
per_channel=granularity == Granularity.PER_CHANNEL,
64+
narrow_range=narrow_range,
65+
signedness_to_force=self.signedness_to_force,
66+
)
67+
)
68+
return ret
69+
70+
71+
@dataclass(frozen=True, kw_only=True, slots=True)
72+
class OpDesc:
73+
"""
74+
Represents the description of quantization schemes applicable for activations and weights of operation.
75+
76+
:param type: The type of the operation.
77+
:param activations: A tuple containing the quantization configuration for the activations of the operation.
78+
:param weights: A tuple containing the quantization configuration for the weights of the operation.
79+
:param attributes: A dictionary of additional attributes.
80+
"""
81+
82+
type: str
83+
activations: tuple[QConfigSpace, ...] = field(default_factory=tuple)
84+
weights: tuple[QConfigSpace, ...] = field(default_factory=tuple)
85+
attributes: dict[str, Any] = field(default_factory=dict)

src/nncf/common/hardware/opset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
# See the License for the specific language governing permissions and
1010
# limitations under the License.
1111

12+
from nncf.parameters import StrEnum
1213

13-
class HWConfigOpName:
14+
15+
class HWConfigOpName(StrEnum):
1416
CONVOLUTION = "Convolution"
1517
DEPTHWISECONVOLUTION = "DepthWiseConvolution"
1618
MATMUL = "MatMul"
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Copyright (c) 2025 Intel Corporation
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
13+
from nncf.common.hardware.defines import Granularity
14+
from nncf.common.hardware.defines import QConfigSpace
15+
from nncf.common.quantization.structs import QuantizationScheme
16+
17+
q8_a_sym = QConfigSpace(
18+
bits=8,
19+
mode=(QuantizationScheme.SYMMETRIC,),
20+
granularity=(Granularity.PER_TENSOR,),
21+
narrow_range=(False,),
22+
)
23+
24+
q8_a = QConfigSpace(
25+
bits=8,
26+
mode=(QuantizationScheme.SYMMETRIC, QuantizationScheme.ASYMMETRIC),
27+
granularity=(Granularity.PER_TENSOR,),
28+
narrow_range=(False,),
29+
)
30+
31+
q8_a_ch = QConfigSpace(
32+
bits=8,
33+
mode=(QuantizationScheme.SYMMETRIC, QuantizationScheme.ASYMMETRIC),
34+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
35+
narrow_range=(False,),
36+
)
37+
38+
q8_w_sym = QConfigSpace(
39+
bits=8,
40+
mode=(QuantizationScheme.SYMMETRIC,),
41+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
42+
narrow_range=(True,),
43+
signedness_to_force=True,
44+
)
45+
46+
q8_w_sym_any_nr = QConfigSpace(
47+
bits=8,
48+
mode=(QuantizationScheme.SYMMETRIC,),
49+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
50+
narrow_range=(True, False),
51+
signedness_to_force=True,
52+
)
53+
54+
q8_w_asym = QConfigSpace(
55+
bits=8,
56+
mode=(QuantizationScheme.ASYMMETRIC,),
57+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
58+
narrow_range=(False,),
59+
)
60+
61+
q16_a_sym = QConfigSpace(
62+
bits=16,
63+
mode=(QuantizationScheme.SYMMETRIC,),
64+
granularity=(Granularity.PER_TENSOR,),
65+
narrow_range=(False,),
66+
)
67+
68+
q16_a = QConfigSpace(
69+
bits=16,
70+
mode=(QuantizationScheme.SYMMETRIC, QuantizationScheme.ASYMMETRIC),
71+
granularity=(Granularity.PER_TENSOR,),
72+
narrow_range=(False,),
73+
)
74+
75+
q16_a_ch = QConfigSpace(
76+
bits=16,
77+
mode=(QuantizationScheme.SYMMETRIC, QuantizationScheme.ASYMMETRIC),
78+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
79+
narrow_range=(False,),
80+
)
81+
82+
q16_w_sym = QConfigSpace(
83+
bits=16,
84+
mode=(QuantizationScheme.SYMMETRIC,),
85+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
86+
narrow_range=(True,),
87+
signedness_to_force=True,
88+
)
89+
90+
q16_w_sym_any_nr = QConfigSpace(
91+
bits=16,
92+
mode=(QuantizationScheme.SYMMETRIC,),
93+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
94+
narrow_range=(True, False),
95+
signedness_to_force=True,
96+
)
97+
98+
q16_w_asym = QConfigSpace(
99+
bits=16,
100+
mode=(QuantizationScheme.ASYMMETRIC,),
101+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
102+
narrow_range=(False,),
103+
)
104+
105+
q4_tn = QConfigSpace(
106+
bits=4,
107+
mode=(QuantizationScheme.SYMMETRIC,),
108+
granularity=(Granularity.PER_TENSOR,),
109+
narrow_range=(False,),
110+
)
111+
q4_ch = QConfigSpace(
112+
bits=4,
113+
mode=(QuantizationScheme.SYMMETRIC,),
114+
granularity=(Granularity.PER_CHANNEL,),
115+
narrow_range=(False,),
116+
)
117+
q4_w = QConfigSpace(
118+
bits=4,
119+
mode=(QuantizationScheme.SYMMETRIC,),
120+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
121+
narrow_range=(False,),
122+
)
123+
q2_ch = QConfigSpace(
124+
bits=2,
125+
mode=(QuantizationScheme.SYMMETRIC,),
126+
granularity=(Granularity.PER_CHANNEL,),
127+
narrow_range=(False,),
128+
)
129+
130+
q2_w = QConfigSpace(
131+
bits=2,
132+
mode=(QuantizationScheme.SYMMETRIC,),
133+
granularity=(Granularity.PER_CHANNEL, Granularity.PER_TENSOR),
134+
narrow_range=(False,),
135+
)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (c) 2025 Intel Corporation
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.

0 commit comments

Comments
 (0)