Skip to content

Commit 066d1e4

Browse files
author
Sara Adkins
authored
Offloading Bug Fix (#58)
* fix fstring * fix offloaded sparsity calculation
1 parent 0a0a2de commit 066d1e4

File tree

1 file changed

+7
-5
lines changed
  • src/llmcompressor/transformers/compression

1 file changed

+7
-5
lines changed

src/llmcompressor/transformers/compression/helpers.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from typing import Dict, List, Optional, Union
2-
2+
from accelerate.accelerator import get_state_dict_offloaded_model
33
import psutil
44
import torch
55
from accelerate import infer_auto_device_map, init_empty_weights
@@ -73,10 +73,12 @@ def infer_sparsity_structure_from_model(model: torch.nn.Module) -> Optional[str]
7373
structures = {"2:4"}
7474
for sparsity_structure in structures:
7575
linear_modules = get_linear_layers(model)
76+
offloaded_params = get_state_dict_offloaded_model(model)
77+
7678
linear_modules_with_sparsity_structure = [
77-
tensor_follows_mask_structure(layer.weight)
78-
for layer in tqdm(
79-
linear_modules.values(),
79+
tensor_follows_mask_structure(offloaded_params[f"{name}.weight"])
80+
for name in tqdm(
81+
linear_modules.keys(),
8082
desc="Checking whether model follows "
8183
f"{sparsity_structure} sparsity structure",
8284
)
@@ -199,7 +201,7 @@ def calculate_offload_device_map(
199201
available_gpus = torch.cuda.device_count()
200202
if available_gpus < num_gpus:
201203
raise ValueError(
202-
"Requested {num_gpus} GPUs but only {available_gpus} are available."
204+
f"Requested {num_gpus} GPUs but only {available_gpus} are available."
203205
)
204206
max_gpu_memory = [max_gpu_memory] * num_gpus
205207

0 commit comments

Comments
 (0)