diff --git a/.azure-pipelines/scripts/performance/check_performance.py b/.azure-pipelines/scripts/performance/check_performance.py index 079b3b0a7..9cbdd27b3 100644 --- a/.azure-pipelines/scripts/performance/check_performance.py +++ b/.azure-pipelines/scripts/performance/check_performance.py @@ -1,65 +1,127 @@ +import logging import re import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Optional -LOG_DIR = "/auto-round/log_dir" +logging.basicConfig(level=logging.INFO, format="%(message)s") +LOG_DIR = Path("/auto-round/log_dir") +OUTPUT_BASE_DIR = Path("/auto-round/.azure-pipelines/scripts/performance") -def parse_tuning_time(log_file): - with open(log_file, "r") as f: - content = f.read() - pattern = r"tuning time ([0-9]+\.[0-9]+)" - match = re.search(pattern, content) +@dataclass +class QuantMetrics: + tuning_time_s: Optional[float] = None + peak_ram_gb: Optional[float] = None + peak_vram_gb: Optional[float] = None + output_size_gb: Optional[float] = None - if match: - elapsed = str_to_float(match.group(1)) - return elapsed - return None +def get_dir_size_gb(path: Path) -> float: + if not path.exists() or not path.is_dir(): + return 0.0 -def str_to_float(value): - try: - return round(float(value), 4) - except ValueError: - return value + total_bytes = sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) + return round(total_bytes / (1024**3), 4) -def get_tuning_time(): +def parse_log_file(log_file: Path) -> QuantMetrics: + metrics = QuantMetrics() + + if not log_file.exists(): + logging.warning(f"Log file not found: {log_file}") + return metrics + + content = log_file.read_text(encoding="utf-8") + + # Use findall to capture all occurrences and take the most recent one. + time_matches = re.findall(r"tuning time ([0-9]+\.[0-9]+)", content) + if time_matches: + metrics.tuning_time_s = round(float(time_matches[-1]), 4) + + ram_matches = re.findall(r"'peak_ram':\s*([\d.]+)\s*GB,\s*'peak_vram':\s*([\d.]+)\s*GB", content) + if ram_matches: + last_ram, last_vram = ram_matches[-1] + metrics.peak_ram_gb = round(float(last_ram), 4) + metrics.peak_vram_gb = round(float(last_vram), 4) + + return metrics + + +def get_tuning_info() -> Dict[str, Dict[str, QuantMetrics]]: summary = {} model_list = ["Qwen/Qwen3-0.6B"] + for model in model_list: summary[model] = {} for test_mode in ["current", "baseline"]: - log_file = f"{LOG_DIR}/perf_test_{test_mode}.log" - print(f"Processing {log_file}...") - tuning_time = parse_tuning_time(log_file) - if tuning_time is not None: - summary[model][test_mode] = tuning_time - else: - summary[model][test_mode] = "N/A" + log_file = LOG_DIR / f"perf_test_{test_mode}.log" + output_dir = OUTPUT_BASE_DIR / test_mode + + logging.info(f"Processing {log_file}...") + + metrics = parse_log_file(log_file) + metrics.output_size_gb = get_dir_size_gb(output_dir) + + summary[model][test_mode] = metrics return summary +def compare_metric( + metric_name: str, current: Optional[float], baseline: Optional[float], tolerance: float = 0.1 +) -> bool: + if current is None or baseline is None: + logging.error(f" [-] {metric_name}: Incomplete data (Current: {current}, Baseline: {baseline})") + return False + + if baseline == 0: + logging.warning(f" [!] {metric_name}: Baseline is 0, cannot calculate ratio.") + return False + + ratio = current / baseline + diff_percent = (ratio - 1) * 100 + + msg = f" [*] {metric_name:<20}: Current = {current:<8} | Baseline = {baseline:<8} (Diff: {diff_percent:+.2f}%)" + + if 1.0 - tolerance <= ratio <= 1.0 + tolerance: + logging.info(f"{msg} -> PASS") + return True + else: + logging.error(f"{msg} -> FAIL") + return False + + def check_performance(): - status = True - summary = get_tuning_time() - for model, times in summary.items(): - current_time = times.get("current", "N/A") - baseline_time = times.get("baseline", "N/A") - if current_time != "N/A" and baseline_time != "N/A": - print(f"{model}:\n Current = {current_time} seconds\n Baseline = {baseline_time} seconds") - ratio = current_time / baseline_time - if ratio < 0.9 or ratio > 1.1: - status = False - else: - print(f"{model}: Tuning time data is incomplete.") - status = False - - if status: - print("Performance check passed: Current tuning times are within acceptable limits compared to baseline.") + summary = get_tuning_info() + all_passed = True + + for model, modes in summary.items(): + logging.info(f"\nEvaluating Model: {model}") + logging.info("-" * 60) + + current: QuantMetrics = modes.get("current", QuantMetrics()) + baseline: QuantMetrics = modes.get("baseline", QuantMetrics()) + + if not compare_metric("Tuning Time (s)", current.tuning_time_s, baseline.tuning_time_s, tolerance=0.1): + all_passed = False + + if not compare_metric("Peak RAM (GB)", current.peak_ram_gb, baseline.peak_ram_gb, tolerance=0.1): + all_passed = False + + if not compare_metric("Peak VRAM (GB)", current.peak_vram_gb, baseline.peak_vram_gb, tolerance=0.05): + all_passed = False + + if not compare_metric("Output Size (GB)", current.output_size_gb, baseline.output_size_gb, tolerance=0.05): + all_passed = False + + logging.info("=" * 60) + if all_passed: + logging.info("✅ Performance check passed: All metrics are within acceptable limits (±10%).") else: - print("Performance check failed: Current tuning times exceed acceptable limits compared to baseline.") + logging.error("❌ Performance check failed: Current metrics exceed acceptable limits compared to baseline.") sys.exit(1) diff --git a/.azure-pipelines/scripts/performance/perf_test.sh b/.azure-pipelines/scripts/performance/perf_test.sh index f24c1aac6..2e4843742 100644 --- a/.azure-pipelines/scripts/performance/perf_test.sh +++ b/.azure-pipelines/scripts/performance/perf_test.sh @@ -37,9 +37,8 @@ function run_performance_test() { test_mode=$1 cd /auto-round/.azure-pipelines/scripts/performance local log_file="perf_test_${test_mode}.log" - rm -rf "saved" "${LOG_DIR}/${log_file}" echo "##[group]run ${test_mode} performance test..." - auto-round --model_name ${model_name} --bits 4 --iters 200 --enable_torch_compile --device hpu --output_dir ./saved 2>&1 | tee -a "${LOG_DIR}/${log_file}" + auto-round --model_name ${model_name} --bits 4 --iters 200 --enable_torch_compile --device hpu --output_dir "./${test_mode}" 2>&1 | tee -a "${LOG_DIR}/${log_file}" echo "##[endgroup]" }