Skip to content

Commit 4dbda87

Browse files
authored
WOQ: set default act_quant_mode to PER_BATCH_IC_BLOCK_SYM (#3321)
1 parent 1c51f3f commit 4dbda87

File tree

7 files changed

+71
-12
lines changed

7 files changed

+71
-12
lines changed

examples/cpu/llm/inference/distributed/run_accuracy_with_deepspeed.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,17 @@ def decorator(func):
145145
)
146146
parser.add_argument(
147147
"--act-quant-mode",
148-
choices=["PER_TENSOR", "PER_IC_BLOCK", "PER_BATCH", "PER_BATCH_IC_BLOCK"],
149-
default="PER_IC_BLOCK",
148+
choices=[
149+
"PER_TENSOR",
150+
"PER_IC_BLOCK",
151+
"PER_BATCH",
152+
"PER_BATCH_IC_BLOCK",
153+
"PER_TENSOR_SYM",
154+
"PER_IC_BLOCK_SYM",
155+
"PER_BATCH_SYM",
156+
"PER_BATCH_IC_BLOCK_SYM",
157+
],
158+
default="PER_BATCH_IC_BLOCK_SYM",
150159
type=str,
151160
help="Quantization mode for activation with different granularity. "
152161
"For lowp-mode=INT8 only. For other cases, it has no effect. "
@@ -155,6 +164,10 @@ def decorator(func):
155164
"PER_IC_BLOCK(1): quantize per group along IC with group size = IC_BLOCK; "
156165
"PER_BATCH(2): quantize per batch; "
157166
"PER_BATCH_IC_BLOCK(3): quantize per block of size 1 x IC_BLOCK. "
167+
"PER_TENSOR_SYM(4): symmetrically quantize per tensor; "
168+
"PER_IC_BLOCK_SYM(5): symmetrically quantize per group along IC with group size = IC_BLOCK; "
169+
"PER_BATCH_SYM(6): symmetrically quantize per batch; "
170+
"PER_BATCH_IC_BLOCK_SYM(7): symmetrically quantize per block of size 1 x IC_BLOCK. "
158171
"IC_BLOCK is determined by IC automatically.",
159172
)
160173
parser.add_argument(
@@ -417,6 +430,10 @@ def write_checkpoints_json():
417430
"PER_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK,
418431
"PER_BATCH": ipex.quantization.WoqActQuantMode.PER_BATCH,
419432
"PER_BATCH_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK,
433+
"PER_TENSOR_SYM": ipex.quantization.WoqActQuantMode.PER_TENSOR_SYM,
434+
"PER_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK_SYM,
435+
"PER_BATCH_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_SYM,
436+
"PER_BATCH_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
420437
}
421438
weight_qscheme = (
422439
WoqWeightQScheme.SYMMETRIC
@@ -1196,6 +1213,10 @@ def write_checkpoints_json():
11961213
"PER_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK,
11971214
"PER_BATCH": ipex.quantization.WoqActQuantMode.PER_BATCH,
11981215
"PER_BATCH_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK,
1216+
"PER_TENSOR_SYM": ipex.quantization.WoqActQuantMode.PER_TENSOR_SYM,
1217+
"PER_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK_SYM,
1218+
"PER_BATCH_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_SYM,
1219+
"PER_BATCH_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
11991220
}
12001221
weight_qscheme = (
12011222
WoqWeightQScheme.SYMMETRIC
@@ -1849,6 +1870,10 @@ def write_checkpoints_json():
18491870
"PER_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK,
18501871
"PER_BATCH": ipex.quantization.WoqActQuantMode.PER_BATCH,
18511872
"PER_BATCH_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK,
1873+
"PER_TENSOR_SYM": ipex.quantization.WoqActQuantMode.PER_TENSOR_SYM,
1874+
"PER_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK_SYM,
1875+
"PER_BATCH_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_SYM,
1876+
"PER_BATCH_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
18521877
}
18531878
weight_qscheme = (
18541879
WoqWeightQScheme.SYMMETRIC

examples/cpu/llm/inference/distributed/run_generation_tp.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,17 @@
146146
)
147147
parser.add_argument(
148148
"--act-quant-mode",
149-
choices=["PER_TENSOR", "PER_IC_BLOCK", "PER_BATCH", "PER_BATCH_IC_BLOCK"],
150-
default="PER_IC_BLOCK",
149+
choices=[
150+
"PER_TENSOR",
151+
"PER_IC_BLOCK",
152+
"PER_BATCH",
153+
"PER_BATCH_IC_BLOCK",
154+
"PER_TENSOR_SYM",
155+
"PER_IC_BLOCK_SYM",
156+
"PER_BATCH_SYM",
157+
"PER_BATCH_IC_BLOCK_SYM",
158+
],
159+
default="PER_BATCH_IC_BLOCK_SYM",
151160
type=str,
152161
help="Quantization mode for activation with different granularity. "
153162
"For lowp-mode=INT8 only. For other cases, it has no effect. "
@@ -156,6 +165,10 @@
156165
"PER_IC_BLOCK(1): quantize per group along IC with group size = IC_BLOCK; "
157166
"PER_BATCH(2): quantize per batch; "
158167
"PER_BATCH_IC_BLOCK(3): quantize per block of size 1 x IC_BLOCK. "
168+
"PER_TENSOR_SYM(4): symmetrically quantize per tensor; "
169+
"PER_IC_BLOCK_SYM(5): symmetrically quantize per group along IC with group size = IC_BLOCK; "
170+
"PER_BATCH_SYM(6): symmetrically quantize per batch; "
171+
"PER_BATCH_IC_BLOCK_SYM(7): symmetrically quantize per block of size 1 x IC_BLOCK. "
159172
"IC_BLOCK is determined by IC automatically.",
160173
)
161174
parser.add_argument(
@@ -339,6 +352,10 @@ def trace_handler(prof):
339352
"PER_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK,
340353
"PER_BATCH": ipex.quantization.WoqActQuantMode.PER_BATCH,
341354
"PER_BATCH_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK,
355+
"PER_TENSOR_SYM": ipex.quantization.WoqActQuantMode.PER_TENSOR_SYM,
356+
"PER_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK_SYM,
357+
"PER_BATCH_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_SYM,
358+
"PER_BATCH_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
342359
}
343360
weight_qscheme = (
344361
WoqWeightQScheme.SYMMETRIC

examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,17 @@
156156
)
157157
parser.add_argument(
158158
"--act-quant-mode",
159-
choices=["PER_TENSOR", "PER_IC_BLOCK", "PER_BATCH", "PER_BATCH_IC_BLOCK"],
160-
default="PER_IC_BLOCK",
159+
choices=[
160+
"PER_TENSOR",
161+
"PER_IC_BLOCK",
162+
"PER_BATCH",
163+
"PER_BATCH_IC_BLOCK",
164+
"PER_TENSOR_SYM",
165+
"PER_IC_BLOCK_SYM",
166+
"PER_BATCH_SYM",
167+
"PER_BATCH_IC_BLOCK_SYM",
168+
],
169+
default="PER_BATCH_IC_BLOCK_SYM",
161170
type=str,
162171
help="Quantization mode for activation with different granularity. "
163172
"For lowp-mode=INT8 only. For other cases, it has no effect. "
@@ -166,6 +175,10 @@
166175
"PER_IC_BLOCK(1): quantize per group along IC with group size = IC_BLOCK; "
167176
"PER_BATCH(2): quantize per batch; "
168177
"PER_BATCH_IC_BLOCK(3): quantize per block of size 1 x IC_BLOCK. "
178+
"PER_TENSOR_SYM(4): symmetrically quantize per tensor; "
179+
"PER_IC_BLOCK_SYM(5): symmetrically quantize per group along IC with group size = IC_BLOCK; "
180+
"PER_BATCH_SYM(6): symmetrically quantize per batch; "
181+
"PER_BATCH_IC_BLOCK_SYM(7): symmetrically quantize per block of size 1 x IC_BLOCK. "
169182
"IC_BLOCK is determined by IC automatically.",
170183
)
171184
parser.add_argument(
@@ -489,6 +502,10 @@ def write_checkpoints_json():
489502
"PER_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK,
490503
"PER_BATCH": ipex.quantization.WoqActQuantMode.PER_BATCH,
491504
"PER_BATCH_IC_BLOCK": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK,
505+
"PER_TENSOR_SYM": ipex.quantization.WoqActQuantMode.PER_TENSOR_SYM,
506+
"PER_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_IC_BLOCK_SYM,
507+
"PER_BATCH_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_SYM,
508+
"PER_BATCH_IC_BLOCK_SYM": ipex.quantization.WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
492509
}
493510
weight_qscheme = (
494511
WoqWeightQScheme.SYMMETRIC

examples/cpu/llm/inference/run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
195195
"PER_BATCH_SYM",
196196
"PER_BATCH_IC_BLOCK_SYM",
197197
],
198-
default="PER_IC_BLOCK",
198+
default="PER_BATCH_IC_BLOCK_SYM",
199199
type=str,
200200
help="Quantization mode for activation with different granularity. "
201201
"For lowp-mode=INT8 only. For other cases, it has no effect. "

examples/cpu/llm/inference/single_instance/run_quantization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@
230230
"PER_BATCH_SYM",
231231
"PER_BATCH_IC_BLOCK_SYM",
232232
],
233-
default="PER_IC_BLOCK",
233+
default="PER_BATCH_IC_BLOCK_SYM",
234234
type=str,
235235
help="Quantization mode for activation with different granularity. "
236236
"For lowp-mode=INT8 only. For other cases, it has no effect. "

intel_extension_for_pytorch/quantization/_qconfig.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def get_weight_only_quant_qconfig_mapping(
188188
*,
189189
weight_dtype: int = WoqWeightDtype.INT8,
190190
lowp_mode: int = WoqLowpMode.NONE,
191-
act_quant_mode: int = WoqActQuantMode.PER_IC_BLOCK,
191+
act_quant_mode: int = WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
192192
group_size: int = -1,
193193
weight_qscheme: int = WoqWeightQScheme.UNDEFINED,
194194
):
@@ -222,8 +222,8 @@ def get_weight_only_quant_qconfig_mapping(
222222
No grouping along IC for weight. For activation,
223223
IC_BLOCK is determined automatically by IC.
224224
If group_size > 0:
225-
act_quant_mode can be any. If act_quant_mode is PER_IC_BLOCK
226-
or PER_BATCH_IC_BLOCK, weight is grouped along IC by group_size.
225+
act_quant_mode can be any. If act_quant_mode is PER_IC_BLOCK(_SYM)
226+
or PER_BATCH_IC_BLOCK(_SYM), weight is grouped along IC by group_size.
227227
The IC_BLOCK for activation is determined by group_size automatically.
228228
Each group has its own quantization parameters.
229229
weight_qscheme: Specify how to quantize weight, asymmetrically or symmetrically. Generally,

tests/cpu/test_quantization_default_recipe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2155,7 +2155,7 @@ def test(feature, has_bias, w_dtype, lowp_mode, enable_amp):
21552155
None,
21562156
group_size,
21572157
lowp_mode,
2158-
WoqActQuantMode.PER_IC_BLOCK,
2158+
WoqActQuantMode.PER_BATCH_IC_BLOCK_SYM,
21592159
compensation,
21602160
)
21612161
torch.testing.assert_close(output, output_ref)

0 commit comments

Comments
 (0)