Skip to content

Commit 409ceef

Browse files
xin3helvliang-intel
authored andcommitted
refactor eval and add UT (#1324)
Signed-off-by: He, Xin3 <xin3.he@intel.com>
1 parent dd86e35 commit 409ceef

31 files changed

+942
-820
lines changed

auto_round/__init__.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,4 @@
2222

2323
monkey_patch()
2424

25-
26-
def __getattr__(name):
27-
if name == "AutoHfQuantizer":
28-
from auto_round.inference.auto_quantizer import AutoHfQuantizer
29-
30-
return AutoHfQuantizer
31-
if name == "AutoRoundConfig":
32-
from auto_round.inference.auto_quantizer import AutoRoundConfig
33-
34-
return AutoRoundConfig
35-
36-
raise AttributeError(f"auto-round has no attribute '{name}'")
37-
38-
3925
from .version import __version__

auto_round/__main__.py

Lines changed: 20 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
from auto_round.auto_scheme import AutoScheme
1919
from auto_round.compressors import BaseCompressor
20-
from auto_round.eval.eval_cli import EvalArgumentParser, _eval_init, eval, eval_task_by_task
20+
from auto_round.eval.eval_cli import EvalArgumentParser, eval, eval_task_by_task
21+
from auto_round.eval.evaluation import run_model_evaluation
2122
from auto_round.schemes import PRESET_SCHEMES
2223
from auto_round.utils import (
2324
clear_memory,
@@ -383,6 +384,20 @@ def __init__(self, *args, **kwargs):
383384
eval_args.add_argument(
384385
"--eval_task_by_task", action="store_true", help="Evaluate tasks sequentially instead of batching. "
385386
)
387+
eval_args.add_argument(
388+
"--eval_backend",
389+
default="hf",
390+
type=str,
391+
choices=["hf", "vllm"],
392+
help="Backend to use for model evaluation. Use hf backend for evaluation by default.",
393+
)
394+
eval_args.add_argument(
395+
"--vllm_args",
396+
default=None,
397+
type=str,
398+
help="(for vllm) Custom vllm arguments in format: '--arg1=value1,--arg2=value2'. "
399+
"Example: '--tensor_parallel_size=2,--gpu_memory_utilization=0.9'",
400+
)
386401
eval_args.add_argument(
387402
"--eval_model_dtype",
388403
default=None,
@@ -703,185 +718,15 @@ def tune(args):
703718
suffix = f"g{autoround.group_size}"
704719
export_dir = os.path.join(args.output_dir, model_name.split("/")[-1] + f"-w{autoround.bits}{suffix}")
705720

721+
# ======================= Quantize and save model =======================
706722
model, folders = autoround.quantize_and_save(export_dir, format=args.format) # pylint: disable=E1101
707723
tokenizer = autoround.tokenizer # pylint: disable=E1101
708724

709725
model.eval()
710726
clear_memory()
711727

712-
eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto")
713-
714-
# diffusion model has different evaluation path
715-
if getattr(autoround, "diffusion", False):
716-
pipe = autoround.pipe
717-
pipe.to(model.dtype)
718-
pipe.transformer = model
719-
device_str = detect_device(device_str)
720-
pipe = pipe.to(device_str)
721-
if pipe.dtype != eval_model_dtype and eval_model_dtype != "auto":
722-
pipe.to(getattr(torch, eval_model_dtype))
723-
724-
gen_kwargs = {
725-
"guidance_scale": args.guidance_scale,
726-
"output_type": "pil",
727-
"num_inference_steps": args.num_inference_steps,
728-
"generator": (
729-
None
730-
if args.generator_seed is None
731-
else torch.Generator(device=pipe.device).manual_seed(args.generator_seed)
732-
),
733-
}
734-
if not os.path.exists(args.image_save_dir):
735-
os.makedirs(args.image_save_dir)
736-
737-
if args.prompt is not None:
738-
outputs = pipe(prompt=args.prompt, **gen_kwargs)
739-
outputs.images[0].save(os.path.join(args.image_save_dir, "img.png"))
740-
logger.info(
741-
f"Image generated with prompt {args.prompt} is saved as {os.path.join(args.image_save_dir, 'img.png')}"
742-
)
743-
744-
if args.prompt_file is not None:
745-
from auto_round.compressors.diffusion import diffusion_eval
746-
747-
metrics = args.metrics.split(",")
748-
diffusion_eval(pipe, args.prompt_file, metrics, args.image_save_dir, 1, gen_kwargs)
749-
return
750-
751-
lm_eval_version = get_library_version("lm-eval")
752-
753-
eval_folder = folders[-1]
754-
if args.tasks is None or args.tasks == "" or eval_folder is None:
755-
return
756-
757-
tasks = args.tasks
758-
if isinstance(tasks, str):
759-
tasks = tasks.split(",")
760-
761-
from lm_eval.utils import make_table # pylint: disable=E0401
762-
763-
logger.info(f"Using lm-eval version {lm_eval_version}")
764-
eval_gguf_model = False
765-
for file in os.listdir(eval_folder):
766-
if file.endswith("gguf"):
767-
eval_gguf_model = True
768-
break
769-
770-
import time
771-
772-
if "llama" in args.model.lower() and not args.add_bos_token:
773-
logger.warning("set add_bos_token=True for llama model.")
774-
args.add_bos_token = True
775-
if (autoround.act_bits <= 8 and formats[-1] == "fake") or eval_gguf_model:
776-
if eval_gguf_model:
777-
# for file in os.listdir(eval_folder):
778-
# gguf_file = file
779-
gguf_file = None
780-
gguf_format = None # Initialize gguf_format to None
781-
# gguf folder only contains one file
782-
for format in formats:
783-
if format.startswith("gguf"):
784-
gguf_format = format.split(":")[-1].upper()
785-
if gguf_format is None: # Validate gguf_format after the loop
786-
logger.error("No valid gguf format found in formats. Please check the input.")
787-
sys.exit(-1)
788-
for file in os.listdir(eval_folder):
789-
if gguf_format in file:
790-
gguf_file = file
791-
792-
logger.warning("evaluating gguf model is an experimental feature, the accuracy may be not correct.")
793-
if eval_model_dtype == "float32" or eval_model_dtype == "auto":
794-
logger.warning(
795-
"set '--eval_model_dtype bf16' can significantly speed up evaluation for gguf model,"
796-
" but may affect accuracy."
797-
)
798-
if gguf_file is None:
799-
logger.error("Cannot find correct gguf file for evaluation, please check.")
800-
sys.exit(-1)
801-
model = AutoModelForCausalLM.from_pretrained(
802-
eval_folder, gguf_file=gguf_file, device_map="auto", torch_dtype=eval_model_dtype
803-
)
804-
model.eval()
805-
tokenizer = AutoTokenizer.from_pretrained(eval_folder, gguf_file=gguf_file)
806-
else:
807-
if hasattr(model, "hf_device_map") and len(model.hf_device_map) > 1:
808-
from accelerate.big_modeling import dispatch_model
809-
810-
dispatch_model(model, model.hf_device_map)
811-
else:
812-
device_str = detect_device(device_str)
813-
model = model.to(device_str)
814-
if model.dtype != eval_model_dtype and eval_model_dtype != "auto":
815-
model.to(getattr(torch, eval_model_dtype))
816-
817-
if args.eval_task_by_task:
818-
eval_task_by_task(
819-
model,
820-
tokenizer=tokenizer,
821-
device=device_str,
822-
tasks=args.tasks,
823-
limit=args.limit,
824-
batch_size=args.eval_bs,
825-
eval_model_dtype=eval_model_dtype,
826-
add_bos_token=args.add_bos_token,
827-
)
828-
else:
829-
if args.eval_bs is None or args.eval_bs == "auto":
830-
logger.warning("This API does not support auto currently, reset eval_bs to 16")
831-
args.eval_bs = 16
832-
from auto_round.eval.evaluation import simple_evaluate_user_model
833-
834-
st = time.time()
835-
836-
res = simple_evaluate_user_model(
837-
model,
838-
tokenizer,
839-
tasks=tasks,
840-
batch_size=args.eval_bs,
841-
limit=args.limit,
842-
device=device_str,
843-
eval_model_dtype=eval_model_dtype,
844-
add_bos_token=args.add_bos_token,
845-
)
846-
print(make_table(res))
847-
print("evaluation running time=%ds" % (time.time() - st))
848-
else:
849-
if args.eval_task_by_task:
850-
eval_task_by_task(
851-
eval_folder,
852-
device=device_str,
853-
tasks=args.tasks,
854-
batch_size=args.eval_bs,
855-
limit=args.limit,
856-
eval_model_dtype=eval_model_dtype,
857-
mllm=autoround.mllm, # pylint: disable=E1101
858-
add_bos_token=args.add_bos_token,
859-
)
860-
else:
861-
from auto_round.eval.evaluation import simple_evaluate
862-
863-
tasks, model_args, device_str = _eval_init(
864-
args.tasks, eval_folder, args.device_map, args.disable_trust_remote_code, dtype=eval_model_dtype
865-
)
866-
st = time.time()
867-
model_args += f",add_bos_token={args.add_bos_token}"
868-
if autoround.mllm: # pylint: disable=E1101
869-
model_type = "hf-multimodal"
870-
if args.eval_bs is None or args.eval_bs == "auto":
871-
logger.warning("hf-multimodal models does not support auto currently, reset eval_bs to 16")
872-
args.eval_bs = 16
873-
else:
874-
model_type = "hf"
875-
res = simple_evaluate(
876-
model=model_type,
877-
model_args=model_args,
878-
tasks=tasks,
879-
device=device_str,
880-
batch_size=args.eval_bs,
881-
limit=args.limit,
882-
)
883-
print(make_table(res))
884-
print("evaluation running time=%ds" % (time.time() - st))
728+
# ======================= Model evaluation =======================
729+
run_model_evaluation(model, tokenizer, autoround, folders, formats, device_str, args)
885730

886731

887732
def setup_eval_parser():
@@ -909,6 +754,7 @@ def run_eval():
909754
eval_task_by_task(
910755
model=args.model,
911756
device=args.device_map,
757+
limit=args.limit,
912758
tasks=args.tasks,
913759
batch_size=args.eval_bs,
914760
trust_remote_code=not args.disable_trust_remote_code,

auto_round/compressors/mllm/eval.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,6 @@ def mllm_eval(
100100
mode: str = "all",
101101
ignore: bool = False,
102102
):
103-
try:
104-
from transformers import AutoRoundConfig
105-
except:
106-
from auto_round.inference.auto_quantizer import AutoHfQuantizer
107-
108103
model = None
109104
if data_store_dir is not None:
110105
if not os.path.exists(data_store_dir):

0 commit comments

Comments
 (0)