Skip to content

Commit 588d1e5

Browse files
authored
[AutoRound] Update autoround to the release version (#2062)
SUMMARY: - Pin autoround to the 0.9.1 - Minor fix for `quantize_block` - Expose the `batch_size` to users TEST PLAN: ``` pytest -svv ./tests/llmcompressor/transformers/autoround/test_autoround_oneshot.py ``` cc @hshen14 @thuang6 @chensuyue --------- Signed-off-by: yiliu30 <[email protected]>
1 parent fb41f25 commit 588d1e5

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ def localversion_func(version: ScmVersion) -> str:
144144
if BUILD_TYPE == "release"
145145
else "compressed-tensors>=0.12.3a2"
146146
),
147-
# TODO: replace it with the release version
148-
("auto_round @ git+https://github.com/intel/auto-round.git@llmc"),
147+
("auto-round==0.9.1"),
149148
],
150149
extras_require={
151150
"dev": [

src/llmcompressor/modifiers/autoround/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class AutoRoundModifier(Modifier, QuantizationMixin):
107107
# AutoRound modifier arguments
108108
iters: int = 200
109109
enable_torch_compile: bool = True
110+
batch_size: int = 8
110111

111112
# private variables
112113
_module_names: Dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
@@ -223,6 +224,7 @@ def apply_autoround(self, state, subgraph):
223224
scheme=ar_quant_scheme,
224225
iters=self.iters,
225226
enable_torch_compile=self.enable_torch_compile,
227+
batch_size=self.batch_size,
226228
)
227229
# TODO: configure layer-wise config based on self.resolved_config
228230
ar.configure_layer_config(enable_gguf_official_mixed=False)
@@ -236,7 +238,7 @@ def apply_autoround(self, state, subgraph):
236238
block=decoding_layer,
237239
inputs=cur_inputs,
238240
q_input=self._q_input,
239-
device=device,
241+
device=str(device),
240242
# Leave offload for LLMC
241243
auto_offload=False,
242244
)

0 commit comments

Comments
 (0)