Skip to content
Merged
1 change: 1 addition & 0 deletions devops/scripts/benchmarks/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/html/data.js
14 changes: 11 additions & 3 deletions devops/scripts/benchmarks/benches/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dataclasses import dataclass
import os
import shutil
import subprocess
from pathlib import Path
from utils.result import BenchmarkMetadata, BenchmarkTag, Result
from options import options
Expand Down Expand Up @@ -68,7 +69,9 @@ def get_adapter_full_path():
False
), f"could not find adapter file {adapter_path} (and in similar lib paths)"

def run_bench(self, command, env_vars, ld_library=[], add_sycl=True):
def run_bench(
self, command, env_vars, ld_library=[], add_sycl=True, use_stdout=True
):
env_vars = env_vars.copy()
if options.ur is not None:
env_vars.update(
Expand All @@ -80,13 +83,18 @@ def run_bench(self, command, env_vars, ld_library=[], add_sycl=True):
ld_libraries = options.extra_ld_libraries.copy()
ld_libraries.extend(ld_library)

return run(
result = run(
command=command,
env_vars=env_vars,
add_sycl=add_sycl,
cwd=options.benchmark_cwd,
ld_library=ld_libraries,
).stdout.decode()
)

if use_stdout:
return result.stdout.decode()
else:
return result.stderr.decode()

def create_data_path(self, name, skip_data_dir=False):
if skip_data_dir:
Expand Down
223 changes: 223 additions & 0 deletions devops/scripts/benchmarks/benches/gromacs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
# Copyright (C) 2025 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os
import subprocess
from pathlib import Path
from .base import Suite, Benchmark
from options import options
from utils.utils import git_clone, download, run, create_build_path
from utils.result import Result


class GromacsBench(Suite):

def git_url(self):
return "https://gitlab.com/gromacs/gromacs.git"

def git_tag(self):
return "v2025.1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Release today :)

Suggested change
return "v2025.1"
return "v2025.2"

Should not matter much, though.


def grappa_url(self):
return "https://zenodo.org/record/11234002/files/grappa-1.5k-6.1M_rc0.9.tar.gz"

def grappa_file(self):
return Path(os.path.basename(self.grappa_url()))

def __init__(self, directory):
self.directory = Path(directory).resolve()
model_path = str(self.directory / self.grappa_file()).replace(".tar.gz", "")
self.grappa_dir = Path(model_path)
build_path = create_build_path(self.directory, "gromacs-build")
self.gromacs_build_path = Path(build_path)
self.gromacs_src = self.directory / "gromacs-repo"

def name(self):
return "Gromacs Bench"

def benchmarks(self) -> list[Benchmark]:
return [
GromacsBenchmark(self, "0006", "pme", "graphs"),
GromacsBenchmark(self, "0006", "pme", "eager"),
GromacsBenchmark(self, "0192", "rf", "graphs"),
GromacsBenchmark(self, "0192", "rf", "eager"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the 6 size for PME and 192 for RF? Can you put this in a comment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've chosen just two random examples, one for rf and one for pme, as running all would be probably pointless. Maybe even one pme benchmark is enough, with and without graphs?

Copy link
Contributor

@EwanC EwanC Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can see here the effects of different sizes https://www.iwocl.org/wp-content/uploads/iwocl-2025-andrey-alekseenko-sycl-graph.pdf (Slide 49 might be the most illustrative).

The different sizes definitely make a difference to showing the benefit of graphs, as the longer the kernel takes to execute the more that time will dominate performance compared to any overheads saved from graphs in scheduling small kernels.

If you want to pick a single size then 6 is I think a good choice, but I'd use that consistently across PME and RF, as jumping to a large 192 size won't really illustrate the benefits of graphs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd advocate to still have a large test case (192k is fine), at least as eager only. As Ewan noted, it's mostly kernel-bound and graphs don't show much benefit there, but it still could be relevant to, e.g., cover kernel overlap.

]

def setup(self):
self.gromacs_src = git_clone(
self.directory,
"gromacs-repo",
self.git_url(),
self.git_tag(),
)

# Build GROMACS
run(
[
"cmake",
f"-S {str(self.directory)}/gromacs-repo",
f"-B {self.gromacs_build_path}",
f"-DCMAKE_BUILD_TYPE=Release",
f"-DCMAKE_CXX_COMPILER=clang++",
f"-DCMAKE_C_COMPILER=clang",
f"-DGMX_GPU=SYCL",
f"-DGMX_SYCL_ENABLE_GRAPHS=ON",

This comment was marked as resolved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

f"-DGMX_FFT_LIBRARY=MKL",
f"-DGMX_BUILD_OWN_FFTW=ON",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Harmless but useless: we use MKL, so the question of building FFTW does not arise.

Suggested change
f"-DGMX_BUILD_OWN_FFTW=ON",

f"-DGMX_GPU_FFT_LIBRARY=MKL",
f"-DGMX_GPU_NB_CLUSTER_SIZE=8",
f"-DGMX_OPENMP=OFF",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For PVC we also should set DGMX_GPU_NB_NUM_CLUSTER_PER_CELL_X=1, see https://manual.gromacs.org/2025.1/install-guide/index.html#sycl-gpu-acceleration-for-intel-gpus.

Was that intentionally omitted because you want to reuse the same build across different Intel GPUS?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... We don't have a way of specifying gpu in the benchmarks right now. Before it was always PVC, but now we are also testing with BMG.

We probably need a method in utils or somewhere that will autodetect gpu type. Something like:

enum GpuTypes {
  INTEL_PVC,
  INTEL_BMG,
  OTHER // we can extend this with nvidia/amd gpus etc
  ...
}

options {
  gpu = OTHER
}

detect_gpu() {
  if options.sycl is None:
    return OTHER
  output = run(`sycl-ls --verbose`)
  default_gpu = re.search(output, ...);
  if default_gpu.contains(Data Center GPU Max):
    return PVC
  if default_gpu.contains(BMG?)
    return BMG

  return OTHER
}

somewhere in main:
options.gpu = detect_gpu()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like useful functionality to add, but probably scope creep for this PR. Could have a TODO comment about adding this in once the script can detect the device being targeted.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. For now I suggest we add -DGMX_GPU_NB_NUM_CLUSTER_PER_CELL_X=1 and leave a TODO to make it conditional later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. For now I suggest we add -DGMX_GPU_NB_NUM_CLUSTER_PER_CELL_X=1 and leave a TODO to make it conditional later.

FYI, this flag is not critical either way. It improves the performance slightly on PVC, and is at least compatible with BMG (whether it improves the performance or not is an open question).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another minor suggestion is to add -DGMX_CYCLE_SUBCOUNTERS=ON for more detailed time breakdown in the log files; won't have much direct effect, but if there are any performance anomalies, the md.log files might become slightly more useful (if they are preserved).

],
add_sycl=True,
)
run(
f"cmake --build {self.gromacs_build_path} -j {options.build_jobs}",
add_sycl=True,
)
download(
self.directory,
self.grappa_url(),
self.directory / self.grappa_file(),
checksum="cc02be35ba85c8b044e47d097661dffa8bea57cdb3db8b5da5d01cdbc94fe6c8902652cfe05fb9da7f2af0698be283a2",
untar=True,
)

def teardown(self):
pass


class GromacsBenchmark(Benchmark):
def __init__(self, suite, model, type, option):
self.suite = suite
self.model = model # The model name (e.g., "0001.5")
self.type = type
self.gromacs_src = suite.gromacs_src
self.grappa_dir = suite.grappa_dir
self.gmx_path = suite.gromacs_build_path / "bin" / "gmx"
self.option = option

if self.type == "pme":
self.extra_args = [
"-pme",
"gpu",
"-pmefft",
"gpu",
"-notunepme",
]
else:
self.extra_args = []

def name(self):
return f"gromacs-{self.model}-{self.type}-{self.option}"

def setup(self):
if self.type != "rf" and self.type != "pme":
raise ValueError(f"Unknown benchmark type: {self.type}")

if self.option != "graphs" and self.option != "eager":
raise ValueError(f"Unknown option: {self.option}")

if not self.gmx_path.exists():
raise FileNotFoundError(f"gmx executable not found at {self.gmx_path}")

model_dir = self.grappa_dir / self.model

if not model_dir.exists():
raise FileNotFoundError(f"Model directory not found: {model_dir}")

cmd_list = [
str(self.gmx_path),
"grompp",
"-f",
f"{str(self.grappa_dir)}/{self.type}.mdp",
"-c",
str(model_dir / "conf.gro"),
"-p",
str(model_dir / "topol.top"),
"-o",
f"{str(model_dir)}/{self.type}.tpr",
]

# Generate configuration files
self.conf_result = run(
cmd_list,
add_sycl=True,
)

def run(self, env_vars):
model_dir = self.grappa_dir / self.model

env_vars.update({"SYCL_CACHE_PERSISTENT": "1"})

if self.option == "graphs":
env_vars.update({"GMX_CUDA_GRAPH": "1"})

# Run benchmark
command = [
str(self.gmx_path),
"mdrun",
"-s",
f"{str(model_dir)}/{self.type}.tpr",
"-nb",
"gpu",
"-update",
"gpu",
"-bonded",
"gpu",
"-ntmpi",
"1",
"-ntomp",
"1",
"-nobackup",
"-noconfout",
"-nstlist",
"100",
"-pin",
"on",

This comment was marked as resolved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

] + self.extra_args

mdrun_output = self.run_bench(
command,
env_vars,
add_sycl=True,
use_stdout=False,
)
time = self._extract_execution_time(mdrun_output)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to also have some correctness checks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case of non-zero exit code, CalledProcessError will be raised and time extraction will not happen. Do you think of some extra means here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about additional correctness validation (e.g., I had issues with early versions of graphs where some operations were not captured: the simulation won't necessarily crash, but the results would be off). Running the full test suite would be an overkill, but, e.g., an easy test here could be to grep the md.log for Conserved energy drift value. If abs(drift) < 1e-3, then things are roughtly okay (the threshold value is not universal, but that's what in my experience is ok for the Grappa set).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. The value 1e-3 does not work for eager rf, so the verification is added for pme only.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The value 1e-3 does not work for eager rf, so the verification is added for pme only.

That's a bit suspicious. Do you have at hand is the drift value for RF? A small system + a short run, it could be more variable than usual; but if we're talking about ~1e0 and up, that's way broken.


if options.verbose:
print(f"[{self.name()}] Time: {time:.3f} seconds")

return [
Result(
label=f"{self.name()}",
value=time,
unit="s",
command=" ".join(map(str, command)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If data.js is up-to-date, this leads to

  {
    "results": [
      {
        "label": "gromacs-0006-pme",
        "value": 2.026,
        "command": [
          "/",
          "h",
          "o",
          "m",
          "e",
          "/",
          "m",
          "a",
          "t",
          "e",
          "u",
          "s",
....

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this should just be command=command.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

env=env_vars,
stdout=mdrun_output,
git_url=self.suite.git_url(),
git_hash=self.suite.git_tag(),
)
]

def _extract_execution_time(self, log_content):
# Look for the line containing "Time:"
# and extract the first numeric value after it
time_lines = [line for line in log_content.splitlines() if "Time:" in line]

if len(time_lines) != 1:
raise ValueError(
f"Expected exactly 1 line containing 'Time:' in the log content, "
f"but found {len(time_lines)}."
)

for part in time_lines[0].split():
if part.replace(".", "", 1).isdigit():
return float(part)

raise ValueError(f"No numeric value found in the 'Time:' line.")

def teardown(self):
pass
2 changes: 1 addition & 1 deletion devops/scripts/benchmarks/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def git_info_from_path(path: Path) -> (str, str):
)

compute_runtime = (
options.compute_runtime_tag if options.build_compute_runtime else None
options.compute_runtime_tag if options.build_compute_runtime else "Unknown"
)

return BenchmarkRun(
Expand Down
Loading
Loading