Skip to content

Commit bd7f4d5

Browse files
committed
Merge branch 'kylesayrs/fuse-helpers' into bdellabe/transform-modifier
2 parents 3c216dd + bbcdc8c commit bd7f4d5

File tree

36 files changed

+222
-155
lines changed

36 files changed

+222
-155
lines changed

examples/big_models_with_sequential_onloading/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The Llama 3.3 70b is larger than 80 GB, surpassing the size of 1 A100. However,
1818

1919
```python
2020
model_id = "meta-llama/Llama-3.3-70B-Instruct"
21-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
21+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None)
2222
```
2323

2424
The model is first loaded onto the `cpu`, as indicated through the use of `None` for the `device_map` argument in the `from_pretrained` method when loading the model.
@@ -42,4 +42,4 @@ output = model.generate(**sample, max_new_tokens=100)
4242
print(tokenizer.decode(output[0]))
4343
```
4444

45-
Finally, we call `dispatch_for_generation` to evenly load the model across available devices (potentially offloading the model if required) and run sample generations on the newly quantized model.
45+
Finally, we call `dispatch_for_generation` to evenly load the model across available devices (potentially offloading the model if required) and run sample generations on the newly quantized model.

examples/big_models_with_sequential_onloading/llama3.3_70b.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88

99
# Select model and load it.
1010
model_id = "meta-llama/Llama-3.3-70B-Instruct"
11-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
11+
model = AutoModelForCausalLM.from_pretrained(
12+
model_id,
13+
torch_dtype="auto",
14+
device_map=None,
15+
)
1216
tokenizer = AutoTokenizer.from_pretrained(model_id)
1317

1418
# Select calibration dataset.

examples/finetuning/example_alternating_recipe.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ initial_sparsity_stage:
44
SparseGPTModifier:
55
sparsity: 0.5
66
block_size: 128
7-
percdamp: 0.01
7+
dampening_frac: 0.01
88
mask_structure: "0:0"
99
targets: ["Linear"]
1010
ignore: ["re:.*lm_head"]
@@ -20,7 +20,7 @@ next_sparsity_stage:
2020
SparseGPTModifier:
2121
sparsity: 0.7
2222
block_size: 128
23-
percdamp: 0.01
23+
dampening_frac: 0.01
2424
mask_structure: "0:0"
2525
targets: ["Linear"]
2626
ignore: ["re:.*lm_head"]

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def localversion_func(version: ScmVersion) -> str:
119119
"tqdm>=4.0.0",
120120
# torch 1.10 and 1.11 do not support quantized onnx export
121121
"torch>=1.7.0,!=1.10,!=1.11",
122-
"transformers>4.0,<4.53.0",
122+
"transformers>4.0",
123123
"datasets",
124124
"accelerate>=0.20.3,!=1.1.0",
125125
"pynvml",
@@ -146,6 +146,7 @@ def localversion_func(version: ScmVersion) -> str:
146146
"torchvision",
147147
"librosa",
148148
"soundfile",
149+
"torchcodec",
149150
# linting, formatting, and type checking
150151
"black~=24.4.2",
151152
"isort~=5.13.2",

src/llmcompressor/args/dataset_arguments.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,6 @@ class DatasetArguments(CustomDatasetArguments):
162162
),
163163
},
164164
)
165-
trust_remote_code_data: bool = field(
166-
default=False,
167-
metadata={
168-
"help": "Whether or not to allow for datasets defined on the Hub using "
169-
"a dataset script. This option should only be set to True for "
170-
"repositories you trust and in which you have read the code, as it "
171-
"will execute code present on the Hub on your local machine."
172-
},
173-
)
174165
# --- pipeline arguments --- #
175166
pipeline: Optional[str] = field(
176167
default="independent",

src/llmcompressor/entrypoints/oneshot.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,6 @@ def oneshot(
228228
overwrite_cache: bool = False,
229229
preprocessing_num_workers: Optional[int] = None,
230230
min_tokens_per_module: Optional[float] = None,
231-
trust_remote_code_data: bool = False,
232231
# Miscellaneous arguments
233232
output_dir: Optional[str] = None,
234233
log_dir: Optional[str] = "sparse_logs",
@@ -290,8 +289,6 @@ def oneshot(
290289
preprocessing.
291290
:param min_tokens_per_module: Minimum percentage of tokens per
292291
module, relevant for MoE models.
293-
:param trust_remote_code_data: Whether to allow for datasets defined on the Hub
294-
using a dataset script.
295292
296293
# Miscellaneous arguments
297294
:param output_dir: Path to save the output model after calibration.
@@ -303,8 +300,9 @@ def oneshot(
303300
"""
304301

305302
# pass all args directly into Oneshot
306-
local_args = locals()
307-
local_args.pop("kwargs")
303+
local_args = {
304+
k: v for k, v in locals().items() if k not in ("local_args", "kwargs")
305+
}
308306
one_shot = Oneshot(**local_args, **kwargs)
309307
one_shot()
310308

src/llmcompressor/entrypoints/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def initialize_processor_from_path(
241241
)
242242

243243
except ValueError as exception:
244-
if "trust_remote_code=True" in exception.value:
244+
if any("trust_remote_code=True" in arg for arg in exception.args):
245245
raise ValueError(
246246
f"The repository for {processor_src} contains custom code which must "
247247
"be executed to correctly load the tokenizer/processor. You can "

src/llmcompressor/modeling/fuse.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from typing import Iterable
2+
3+
import torch
4+
from compressed_tensors import update_offload_parameter
5+
6+
__all__ = ["fuse_norm_linears"]
7+
8+
9+
def fuse_norm_linears(norm: torch.nn.Module, linears: Iterable[torch.nn.Linear]):
10+
"""
11+
Fuse a norm layer into subsequent linear layers. This useful for ensuring transform
12+
invariance between norm and linear layers.
13+
14+
Note that a model cannot be properly trained after its norms have been fused
15+
16+
:param norm: norm layer whose weight will be fused into subsequent linears
17+
:param linears: linear layers which directly follow the norm layer
18+
"""
19+
if isinstance(norm, torch.nn.RMSNorm):
20+
for linear in linears:
21+
# spinquant does this op in float64
22+
new_weight = linear.weight * norm.weight
23+
update_offload_parameter(linear, "weight", new_weight)
24+
25+
update_offload_parameter(norm, "weight", torch.ones_like(norm.weight))
26+
27+
else:
28+
raise ValueError(f"Cannot fuse norm of type {type(norm)}")

src/llmcompressor/modifiers/modifier.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from abc import abstractmethod
22
from typing import Optional
33

4+
from pydantic import ConfigDict
5+
46
from llmcompressor.core.events import Event, EventType
57
from llmcompressor.core.state import State
68
from llmcompressor.modifiers.interface import ModifierInterface
@@ -30,6 +32,8 @@ class Modifier(ModifierInterface, HooksMixin):
3032
:param update: The update step for the modifier
3133
"""
3234

35+
model_config = ConfigDict(extra="forbid")
36+
3337
index: Optional[int] = None
3438
group: Optional[str] = None
3539
start: Optional[float] = None

src/llmcompressor/modifiers/smoothquant/base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ def on_initialize(self, state: State, **kwargs) -> bool:
127127
f"Expected start to be None or -1, got {self.end}"
128128
)
129129

130+
if not hasattr(state, "data") or state.data.calib is None:
131+
raise ValueError(
132+
f"{self.__class__.__name__} requires a calibration dataset to be "
133+
"provided"
134+
)
130135
self.ignore = [] if not self.ignore else self.ignore
131136
self.mappings = self._infer_mappings_from_model(state.model)
132137
self.resolved_mappings_ = self._resolve_mappings(state.model)

0 commit comments

Comments
 (0)