Skip to content

Commit 96e517f

Browse files
committed
Merge remote-tracking branch 'origin' into kylesayrs/autowrap-support-gemma3n
2 parents 99f9b71 + 0a20392 commit 96e517f

File tree

5 files changed

+110
-42
lines changed

5 files changed

+110
-42
lines changed

.github/ISSUE_TEMPLATE/bug_report.md

Lines changed: 0 additions & 31 deletions
This file was deleted.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: 🐛 Bug report
2+
description: Raise an issue here if you find a bug.
3+
labels: bug
4+
title: "[Bug]: "
5+
6+
body:
7+
- type: markdown
8+
attributes:
9+
value: >
10+
#### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/llm-compressor/issues?q=is%3Aissue+sort%3Acreated-desc+).
11+
12+
#### ⚠️ For any issues related vLLM which are not related to quantization or compressed models, please create an issue in [vllm-project/vllm](https://github.com/vllm-project/vllm/issues).
13+
- type: textarea
14+
attributes:
15+
label: ⚙️ Your current environment
16+
description: |
17+
Please run the following and paste the output below.
18+
```bash
19+
wget https://raw.githubusercontent.com/vllm-project/llm-compressor/main/tools/collect_env.py
20+
# For security purposes, please feel free to check the contents of collect_env.py before running it.
21+
python collect_env.py
22+
```
23+
value: |
24+
<details>
25+
<summary>The output of <code>python collect_env.py</code></summary>
26+
27+
```text
28+
Your output of `python collect_env.py` here
29+
```
30+
31+
</details>
32+
validations:
33+
required: true
34+
- type: textarea
35+
attributes:
36+
label: 🐛 Describe the bug
37+
description: |
38+
Please provide a clear and concise description of what the bug is.
39+
validations:
40+
required: true
41+
- type: textarea
42+
attributes:
43+
label: 🛠️ Steps to reproduce
44+
description: |
45+
If applicable, please describe any steps required to reproduce. If you can share an applicable huggingface model stub, please do so here.
46+
validations:
47+
required: false

src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
ModelCompressor,
1111
SparsityCompressionConfig,
1212
delete_offload_parameter,
13-
is_module_offloaded,
13+
has_offloaded_params,
1414
register_offload_parameter,
1515
)
1616
from loguru import logger
@@ -138,7 +138,7 @@ def untie_word_embeddings(model: PreTrainedModel):
138138
continue
139139

140140
# this could be replaced by a `get_offloaded_parameter` util
141-
if not is_module_offloaded(module):
141+
if not has_offloaded_params(module):
142142
untied_data = module.weight.data.clone()
143143
else:
144144
untied_data = module._hf_hook.weights_map["weight"].clone()

tests/llmcompressor/transformers/gptq/test_oneshot.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
type: "int"
2121
symmetric: true
2222
strategy: "channel"
23-
targets: ["Linear"]
23+
targets: ["re:.*model.layers.2.self_attn.q_proj$"]
2424
"""
2525

2626
recipe_modifier_full = GPTQModifier(
2727
ignore=["lm_head"],
2828
config_groups={
2929
"group_0": QuantizationScheme(
30-
targets=["Linear"], weights=QuantizationArgs(num_bits=4, strategy="channel")
30+
targets=["re:.*model.layers.2.self_attn.q_proj$"],
31+
weights=QuantizationArgs(num_bits=4, strategy="channel"),
3132
)
3233
},
3334
)
@@ -36,18 +37,18 @@
3637
ignore=["lm_head"],
3738
config_groups={
3839
"group_0": QuantizationScheme(
39-
targets=["Linear"],
40+
targets=["re:.*model.layers.2.self_attn.q_proj$"],
4041
weights=QuantizationArgs(num_bits=4, strategy="group", group_size=128),
4142
)
4243
},
4344
)
4445

4546
recipe_modifier_shorthand_a = GPTQModifier(
46-
ignore=["lm_head"], targets="Linear", scheme="W4A16"
47+
ignore=["lm_head"], targets="re:.*model.layers.2.self_attn.q_proj$", scheme="W4A16"
4748
)
4849

4950
recipe_modifier_shorthand_b = GPTQModifier(
50-
ignore=["lm_head"], scheme={"W4A16": ["Linear"]}
51+
ignore=["lm_head"], scheme={"W4A16": ["re:.*model.layers.2.self_attn.q_proj$"]}
5152
)
5253

5354

@@ -65,7 +66,7 @@ def setUp(self):
6566
import torch
6667

6768
self.output = "./oneshot_output"
68-
self.model = "Xenova/llama2.c-stories110M"
69+
self.model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
6970
self.dataset = "open_platypus"
7071
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
7172

@@ -95,17 +96,17 @@ def test_oneshot_application(self):
9596
assert quantization_config is not None
9697

9798
# check config is set properly
98-
assert quantization_config.ignore == ["lm_head"]
99+
assert "lm_head" in quantization_config.ignore
99100
assert len(quantization_config.config_groups) == 1
100101
quant_scheme = quantization_config.config_groups["group_0"]
101102
assert isinstance(quant_scheme, QuantizationScheme)
102-
assert quant_scheme.targets == ["Linear"]
103+
assert quant_scheme.targets == ["re:.*model.layers.2.self_attn.q_proj$"]
103104
weight_args = quantization_config.config_groups["group_0"].weights
104105
assert isinstance(weight_args, QuantizationArgs)
105106
assert weight_args.num_bits == 4
106107

107108
# Check a specific layer is quantized
108-
targetted_linear_layer = model_loaded.model.layers[0].self_attn.k_proj
109+
targetted_linear_layer = model_loaded.model.layers[2].self_attn.q_proj
109110
assert hasattr(targetted_linear_layer, "quantization_scheme")
110111

111112
# Check lm-head is not quantized

tools/collect_env.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""
2+
Script used to generate environment information for the purpose of
3+
creating bug reports. See `.github/ISSUE_TEMPLATE/bug_report.md`
4+
"""
5+
6+
import platform
7+
import sys
8+
import importlib
9+
10+
def get_version(pkg_name):
11+
try:
12+
return importlib.metadata.version(pkg_name)
13+
except importlib.metadata.PackageNotFoundError:
14+
return "None"
15+
16+
def get_torch_hardware_info():
17+
try:
18+
import torch
19+
cuda_devices = []
20+
amd_devices = []
21+
if torch.cuda.is_available():
22+
for i in range(torch.cuda.device_count()):
23+
name = torch.cuda.get_device_name(i)
24+
if "AMD" in name.upper():
25+
amd_devices.append(name)
26+
else:
27+
cuda_devices.append(name)
28+
return cuda_devices, amd_devices
29+
except ImportError:
30+
return [], []
31+
32+
def collect_environment_info():
33+
cuda_devices, amd_devices = get_torch_hardware_info()
34+
35+
info = {
36+
"Operating System": platform.platform(),
37+
"Python Version": sys.version.replace("\n", " "),
38+
"llm-compressor Version": get_version("llmcompressor"),
39+
"compressed-tensors Version": get_version("compressed_tensors"),
40+
"transformers Version": get_version("transformers"),
41+
"torch Version": get_version("torch"),
42+
"CUDA Devices": cuda_devices if cuda_devices else "None",
43+
"AMD Devices": amd_devices if amd_devices else "None",
44+
}
45+
46+
print("### Environment Information ###")
47+
for key, value in info.items():
48+
print(f"{key}: `{value}`")
49+
50+
if __name__ == "__main__":
51+
collect_environment_info()

0 commit comments

Comments
 (0)