Implement reference benchmark

KristijanFaust-OET · KristijanFaust-OET · commit acdc15794ee6 · 2025-04-09T11:33:40.000+02:00
diff --git a/infrastructure/README.md b/infrastructure/README.md
@@ -128,6 +128,7 @@ Each YAML file defines a benchmark with specific configuration:
 | `enable_gcs_upload` | Enable/disable results upload to GCS bucket                | true               |
 | `gcs_bucket_name` | Name of the GCS bucket to upload the results               | solver-benchmarks  |
 | `auto_destroy_vm` | Enable/disable auto deletion of VM on benchmark completion | true               |
+| `reference_benchmark_interval` | Time interval in seconds to run the reference benchmark    | 3600               |
 | `ssh_user` | SSH username                                               | ""                 |
 | `ssh_key_path` | Path to SSH public key                                     | ""                 |
 
diff --git a/infrastructure/main.tf b/infrastructure/main.tf
@@ -73,6 +73,12 @@ variable "auto_destroy_vm" {
   default     = true
 }
 
+variable "reference_benchmark_interval" {
+  description = "Time interval in seconds for running reference benchmarks (0 disables reference benchmarks)"
+  type        = number
+  default     = 3600
+}
+
 locals {
   benchmark_files = fileset("${path.module}/benchmarks", "*.yaml*")
 
@@ -124,6 +130,7 @@ resource "google_compute_instance" "benchmark_instances" {
     auto_destroy_vm = tostring(var.auto_destroy_vm)
     project_id = var.project_id
     zone = var.zone
+    reference_benchmark_interval = tostring(var.reference_benchmark_interval)
   }
 
   # Add the startup script from external file
diff --git a/infrastructure/startup-script.sh b/infrastructure/startup-script.sh
@@ -12,6 +12,17 @@ apt-get install -y tmux git time curl jq
 echo "Cloning repository..."
 git clone https://github.com/open-energy-transition/solver-benchmark.git
 
+# Install a global highs binary for reference runs
+echo "Installing Highs..."
+mkdir -p /opt/highs/bin
+curl -L "https://github.com/JuliaBinaryWrappers/HiGHSstatic_jll.jl/releases/download/HiGHSstatic-v1.10.0%2B0/HiGHSstatic.v1.10.0.x86_64-linux-gnu-cxx11.tar.gz" -o HiGHSstatic.tar.gz
+tar -xzf HiGHSstatic.tar.gz -C /opt/highs/
+chmod +x /opt/highs/bin/highs
+/opt/highs/bin/highs --version
+
+# Downloading benchmark reference model
+curl -L "https://storage.googleapis.com/solver-benchmarks/benchmark-test-model.lp" -o benchmark-test-model.lp
+
 # Install Miniconda
 echo "Installing Miniconda..."
 mkdir -p ~/miniconda3
@@ -36,6 +47,10 @@ echo "Parsed benchmark years: ${BENCHMARK_YEARS_STR}"
 BENCHMARK_FILE=$(curl -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/benchmark_file")
 echo "Using benchmark file: ${BENCHMARK_FILE}"
 
+# Get reference benchmark interval from instance metadata
+REFERENCE_BENCHMARK_INTERVAL=$(curl -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/reference_benchmark_interval")
+echo "Reference benchmark interval: ${REFERENCE_BENCHMARK_INTERVAL} seconds"
+
 # Get benchmark content
 BENCHMARK_CONTENT=$(curl -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/benchmark_content")
 
@@ -49,7 +64,7 @@ chmod +x ./runner/benchmark_all.sh
 # Run the benchmark_all.sh script with our years
 echo "Starting benchmarks for years: ${BENCHMARK_YEARS_STR}"
 source ~/miniconda3/bin/activate
-./runner/benchmark_all.sh -y "${BENCHMARK_YEARS_STR}" ./benchmarks/${BENCHMARK_FILE}
+./runner/benchmark_all.sh -y "${BENCHMARK_YEARS_STR}" -r "${REFERENCE_BENCHMARK_INTERVAL}" ./benchmarks/"${BENCHMARK_FILE}"
 BENCHMARK_EXIT_CODE=$?
 
 if [ $BENCHMARK_EXIT_CODE -ne 0 ]; then
diff --git a/runner/benchmark_all.sh b/runner/benchmark_all.sh
@@ -4,15 +4,17 @@ set -euo pipefail
 
 # Parse command line arguments
 usage() {
-    echo "Usage: $0 [-a] [-y \"<space separated years>\"] <benchmarks yaml file>"
+    echo "Usage: $0 [-a] [-y \"<space separated years>\"] [-r <seconds>] <benchmarks yaml file>"
     echo "Runs the solvers from the specified years (default all) on the benchmarks in the given file"
     echo "Options:"
     echo "    -a    Append to the results CSV file instead of overwriting. Default: overwrite"
     echo "    -y    A space separated string of years to run. Default: 2020 2021 2022 2023 2024"
+    echo "    -r    Reference benchmark interval in seconds. Default: 0 (disabled)"
 }
 overwrite_results="true"
 years=(2020 2021 2022 2023 2024)
-while getopts "hay:" flag
+reference_interval=0  # Default: disabled
+while getopts "hay:r:" flag
 do
     case ${flag} in
     h)  usage
@@ -23,6 +25,9 @@ do
         ;;
     y)  IFS=', ' read -r -a years <<< "$OPTARG"
         ;;
+    r)  reference_interval="$OPTARG"
+        echo "Reference benchmark will run every $reference_interval seconds"
+        ;;
     esac
 done
 shift $(($OPTIND - 1))
@@ -52,9 +57,9 @@ for year in "${years[@]}"; do
     echo "Running benchmarks for the year: $year"
     conda activate "$env_name"
     if [ "$idx" -eq 0 ]; then
-        python "$BENCHMARK_SCRIPT" "$BENCHMARKS_FILE" "$year" "$overwrite_results"
+        python "$BENCHMARK_SCRIPT" "$BENCHMARKS_FILE" "$year" "$overwrite_results" "$reference_interval"
     else
-        python "$BENCHMARK_SCRIPT" "$BENCHMARKS_FILE" "$year" false
+        python "$BENCHMARK_SCRIPT" "$BENCHMARKS_FILE" "$year" false "$reference_interval"
     fi
     conda deactivate
 
diff --git a/runner/run_benchmarks.py b/runner/run_benchmarks.py
@@ -2,10 +2,12 @@
 import gzip
 import json
 import os
+import re
 import shutil
 import statistics
 import subprocess
 import sys
+import time
 from pathlib import Path
 
 import requests
@@ -229,17 +231,101 @@ def benchmark_solver(input_file, solver_name, timeout):
     return metrics
 
 
+def get_highs_binary_version():
+    """Get the version of the HiGHS binary from the --version command"""
+    highs_binary = "/opt/highs/bin/highs"
+
+    try:
+        result = subprocess.run(
+            [highs_binary, "--version"],
+            capture_output=True,
+            text=True,
+            check=True,
+            encoding="utf-8",
+        )
+
+        version_match = re.search(r"HiGHS version (\d+\.\d+\.\d+)", result.stdout)
+        if version_match:
+            return version_match.group(1)
+
+        return "unknown"
+    except Exception as e:
+        print(f"Error getting HiGHS binary version: {str(e)}")
+        return "unknown"
+
+
+def benchmark_highs_binary():
+    """
+    Run a reference benchmark using the pre-installed HiGHS binary
+    """
+    reference_model = "/benchmark-test-model.lp"
+    highs_binary = "/opt/highs/bin/highs"
+
+    command = [
+        highs_binary,
+        reference_model,
+    ]
+
+    # Run the command and capture the output
+    start_time = time.time()
+    result = subprocess.run(
+        command,
+        capture_output=True,
+        text=True,
+        check=False,
+        encoding="utf-8",
+    )
+    runtime = time.time() - start_time
+    if result.returncode != 0:
+        print(
+            f"ERROR running reference benchmark. Captured output:\n{result.stdout}\n{result.stderr}"
+        )
+        metrics = {
+            "status": "ER",
+            "condition": "Error",
+            "objective": None,
+            "runtime": runtime,
+            "duality_gap": None,
+            "max_integrality_violation": None,
+        }
+    else:
+        # Parse HiGHS output to extract objective value
+        objective = None
+        for line in result.stdout.splitlines():
+            if "Objective value" in line:
+                try:
+                    objective = float(line.split(":")[-1].strip())
+                except (ValueError, IndexError):
+                    pass
+
+        metrics = {
+            "status": "OK",
+            "condition": "Optimal",
+            "objective": objective,
+            "runtime": runtime,
+            "memory": "N/A",
+            "duality_gap": None,  # Not available from command line output
+            "max_integrality_violation": None,  # Not available from command line output
+        }
+
+    return metrics
+
+
 def main(
     benchmark_yaml_path,
     solvers,
     year=None,
     iterations=1,
     timeout=10 * 60,
+    reference_interval=0,  # Default: disabled
     override=True,
 ):
     size_categories = None  # TODO add this to CLI args
     results = {}
 
+    # Track the last time we ran the reference benchmark
+    last_reference_run = 0
+
     # Load benchmarks from YAML file
     with open(benchmark_yaml_path, "r") as file:
         yaml_content = yaml.safe_load(file)
@@ -360,24 +446,65 @@ def main(
             results[(benchmark["name"], benchmark["size"], solver, solver_version)] = (
                 metrics
             )
+
+            # Check if we should run the reference benchmark based on the interval
+            if reference_interval > 0:
+                current_time = time.time()
+                time_since_last_run = current_time - last_reference_run
+
+                if last_reference_run == 0 or time_since_last_run >= int(
+                    reference_interval
+                ):
+                    print(
+                        f"Running reference benchmark with HiGHS binary (interval: {reference_interval}s)...",
+                        flush=True,
+                    )
+                    reference_metrics = benchmark_highs_binary()
+
+                    # Add required fields to reference metrics
+                    reference_metrics["size"] = "reference"
+                    reference_metrics["solver"] = "highs-binary"
+                    reference_metrics["solver_version"] = get_highs_binary_version()
+                    reference_metrics["solver_release_year"] = "N/A"
+
+                    # Record reference benchmark results
+                    write_csv_row(results_csv, "reference-benchmark", reference_metrics)
+
+                    # Update the last reference run time
+                    last_reference_run = current_time
+                else:
+                    print(
+                        f"Skipping reference benchmark (last run {time_since_last_run:.1f}s ago, interval: {reference_interval}s)",
+                        flush=True,
+                    )
+
     return results
 
 
 if __name__ == "__main__":
-    # Check for benchmark file argument and optional year and override arguments
+    # Check for benchmark file argument and optional year, override, and reference interval arguments
     if len(sys.argv) < 3:
         raise ValueError(
-            "Usage: python run_benchmarks.py <path_to_benchmarks.yaml> [<year>] [<override>]"
+            "Usage: python run_benchmarks.py <path_to_benchmarks.yaml> [<year>] [<override>] [<reference_interval>]"
         )
         sys.exit(1)
 
     benchmark_yaml_path = sys.argv[1]
     year = sys.argv[2] if len(sys.argv) > 2 else None
     override = sys.argv[3].lower() == "true" if len(sys.argv) > 3 else True
+    reference_interval = (
+        int(sys.argv[4]) if len(sys.argv) > 4 else 0
+    )  # Default: disabled
 
     # solvers = ["highs", "glpk"]  # For dev and testing
     solvers = ["highs", "scip", "cbc", "glpk"]  # For production
 
-    main(benchmark_yaml_path, solvers, year, override=override)
+    main(
+        benchmark_yaml_path,
+        solvers,
+        year,
+        reference_interval=reference_interval,
+        override=override,
+    )
     # Print a message indicating completion
     print("Benchmarking complete.")