Skip to content

Commit 78214ae

Browse files
Merge pull request #165 from bayo-ibm/eval_1gpu
fix: enabling block-by-block evaluation for granite-3.x-models
2 parents 7777b49 + 3d5b342 commit 78214ae

File tree

4 files changed

+18
-5
lines changed

4 files changed

+18
-5
lines changed

fms_mo/quant/ptq.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2140,14 +2140,22 @@ def get_blocks(model, model_type=None):
21402140
None,
21412141
"lm_head",
21422142
),
2143-
"granite": (
2143+
"granite_gptbigcode": (
21442144
"transformer.h",
21452145
"transformer.wte",
21462146
"transformer.wpe",
21472147
None,
21482148
"transformer.ln_f",
21492149
"lm_head",
21502150
),
2151+
"granite": (
2152+
"model.layers",
2153+
"model.embed_tokens",
2154+
"model.rotary_emb",
2155+
None,
2156+
"model.norm",
2157+
"lm_head",
2158+
),
21512159
"llama": (
21522160
"model.layers",
21532161
"model.embed_tokens",

fms_mo/run_quant.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ def run_gptq(model_args, data_args, opt_args, gptq_args):
155155
v2_memory_device="cpu",
156156
)
157157

158-
159158
# Add custom model_type mapping to gptqmodel LUT so GPTQModel can recognize them.
160159
for mtype, cls in custom_gptq_classes.items():
161160
if mtype in MODEL_MAP:

fms_mo/training_args.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ class OptArguments(TypeChecker):
139139

140140
quant_method: str = field(
141141
metadata={
142-
"choices": ["gptq", "gptqv2", "fp8", "dq"],
143-
"help": "Quantization technique"
142+
"choices": ["gptq", "gptqv2", "fp8", "dq"],
143+
"help": "Quantization technique",
144144
}
145145
)
146146
output_dir: str = field(
@@ -229,7 +229,6 @@ class GPTQArguments(TypeChecker):
229229
cache_examples_on_gpu: bool = True
230230

231231

232-
233232
@dataclass
234233
class FP8Arguments(TypeChecker):
235234
"""Dataclass for FP8 related arguments that will be used by llm-compressor."""

fms_mo/utils/eval_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ def eval_llm_1GPU(qcfg, model, test_dataset, pre_cache_func=None, **kwargs): #
9898
logger.info("All blocks are computed for evaluation")
9999

100100
nlls = []
101+
102+
# Required by Granite-3.X (and potentially other models) to scale their logits
103+
logits_scaling = getattr(model.config, "logits_scaling", 1)
104+
101105
# for i, data_mb in enumerate(dloader): #if using dloader.
102106
for i in tqdm(range(qcfg["n_samples"]), desc="Final Evaluating..."):
103107
hidden_states = qcfg["cached_input"][i].to(dev)
@@ -107,6 +111,9 @@ def eval_llm_1GPU(qcfg, model, test_dataset, pre_cache_func=None, **kwargs): #
107111
lm_head.to(dev)
108112
lm_logits = lm_head(hidden_states)
109113

114+
# Scaling of the lm_head outputs to obtain the correct logits
115+
lm_logits /= logits_scaling
116+
110117
# Shift so that tokens < n predict n
111118
shift_logits = lm_logits[:, :-1, :].contiguous().float()
112119
shift_labels = test_dataset.input_ids[:, (i * seq_len) : ((i + 1) * seq_len)][

0 commit comments

Comments
 (0)