File tree Expand file tree Collapse file tree 1 file changed +7
-5
lines changed
src/llmcompressor/transformers/compression Expand file tree Collapse file tree 1 file changed +7
-5
lines changed Original file line number Diff line number Diff line change 11from typing import Dict , List , Optional , Union
2-
2+ from accelerate . accelerator import get_state_dict_offloaded_model
33import psutil
44import torch
55from accelerate import infer_auto_device_map , init_empty_weights
@@ -73,10 +73,12 @@ def infer_sparsity_structure_from_model(model: torch.nn.Module) -> Optional[str]
7373 structures = {"2:4" }
7474 for sparsity_structure in structures :
7575 linear_modules = get_linear_layers (model )
76+ offloaded_params = get_state_dict_offloaded_model (model )
77+
7678 linear_modules_with_sparsity_structure = [
77- tensor_follows_mask_structure (layer .weight )
78- for layer in tqdm (
79- linear_modules .values (),
79+ tensor_follows_mask_structure (offloaded_params [ f" { name } .weight" ] )
80+ for name in tqdm (
81+ linear_modules .keys (),
8082 desc = "Checking whether model follows "
8183 f"{ sparsity_structure } sparsity structure" ,
8284 )
@@ -199,7 +201,7 @@ def calculate_offload_device_map(
199201 available_gpus = torch .cuda .device_count ()
200202 if available_gpus < num_gpus :
201203 raise ValueError (
202- "Requested {num_gpus} GPUs but only {available_gpus} are available."
204+ f "Requested { num_gpus } GPUs but only { available_gpus } are available."
203205 )
204206 max_gpu_memory = [max_gpu_memory ] * num_gpus
205207
You can’t perform that action at this time.
0 commit comments