We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 39e0547 commit 6ffd10cCopy full SHA for 6ffd10c
src/brevitas_examples/llm/main.py
@@ -654,9 +654,11 @@ def quantize_llm(args, extra_args=None):
654
from lm_eval.models.huggingface import HFLM
655
with torch.no_grad(), quant_inference_mode(model, compile=args.compile_eval):
656
model(**calibration_loader[0])
657
+ batch_size = 'auto' if args.few_shot_override_batch_size is None else args.few_shot_override_batch_size
658
659
wrapped_model = HFLM(
- pretrained=model, add_bos_token=True) # need to wrap for LLM eval
660
+ pretrained=model, add_bos_token=True,
661
+ batch_size=batch_size) # need to wrap for LLM eval
662
few_shot_eval_results = evaluator.simple_evaluate(
663
model=wrapped_model,
664
model_args=None,
0 commit comments