diff --git a/jobs.py b/jobs.py
index 7ee238229..9b24c4eab 100755
--- a/jobs.py
+++ b/jobs.py
@@ -143,6 +143,19 @@
         "markers": "quicktest and not sr_disk_4k",
         "name_filter": "not linstor and not zfsvol",
     },
+    "storage-benchmarks": {
+        "description": "runs disk benchmark tests",
+        "requirements": [
+            "A local SR on host A1"
+            "A small VM that can be imported on the SR",
+            "Enough storage space to store the largest test file (numjobs*memory*2)G"
+        ],
+        "nb_pools": 1,
+        "params": {
+            "--vm": "single/small_vm",
+        },
+        "paths": ["tests/storage/benchmarks"],
+    },
     "linstor-main": {
         "description": "tests the linstor storage driver, but avoids migrations and reboots",
         "requirements": [
diff --git a/tests/storage/benchmarks/__init__.py b/tests/storage/benchmarks/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/storage/benchmarks/conftest.py b/tests/storage/benchmarks/conftest.py
new file mode 100644
index 000000000..86a058d48
--- /dev/null
+++ b/tests/storage/benchmarks/conftest.py
@@ -0,0 +1,178 @@
+import itertools
+import logging
+import os
+import tempfile
+import urllib.request
+from urllib.parse import urlparse
+from uuid import uuid4
+
+import pytest
+
+from lib.commands import SSHCommandFailed
+from lib.common import PackageManagerEnum
+
+from .helpers import load_results_from_csv, str_to_tuple
+
+MAX_LENGTH = 64 * (1024**3)  # 64GiB
+
+
+# TODO: use vhd, qcow2, raw... when image_format support will be available
+@pytest.fixture(scope="module", params=["vhd"])
+def image_format(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def running_unix_vm_with_fio(running_unix_vm):
+    vm = running_unix_vm
+    snapshot = vm.snapshot()
+
+    package_cmds = {
+        PackageManagerEnum.APT_GET.value: "apt-get update && apt install -y fio",
+        PackageManagerEnum.RPM.value: "yum install -y fio",
+        PackageManagerEnum.UNKNOWN.value: "apk add fio",
+    }
+
+    package_manager = vm.detect_package_manager().value
+    try:
+        vm.ssh(package_cmds[package_manager])
+        logging.info(f">> Installing fio with {package_cmds[package_manager]}")
+    except SSHCommandFailed:
+        raise RuntimeError("Unsupported package manager: could not install fio")
+
+    yield vm
+
+    # teardown
+    try:
+        snapshot.revert()
+    finally:
+        snapshot.destroy()
+
+
+@pytest.fixture(scope="module")
+def vdi_on_local_sr(host, local_sr_on_hostA1, image_format):
+    sr = local_sr_on_hostA1
+    vdi = sr.create_vdi("testVDI", MAX_LENGTH)  # , image_format=image_format)
+    logging.info(f">> Created VDI {vdi.uuid} of type {image_format}")
+
+    yield vdi
+
+    # teardown
+    logging.info(f"<< Destroying VDI {vdi.uuid}")
+    vdi.destroy()
+
+
+@pytest.fixture(scope="module")
+def plugged_vbd(vdi_on_local_sr, running_unix_vm_with_fio):
+    vm = running_unix_vm_with_fio
+    vdi = vdi_on_local_sr
+    vbd = vm.create_vbd("autodetect", vdi.uuid)
+
+    logging.info(f">> Plugging VDI {vdi.uuid} on VM {vm.uuid}")
+    vbd.plug()
+
+    yield vbd
+
+    # teardown
+    logging.info(f"<< Unplugging VDI {vdi.uuid} from VM {vm.uuid}")
+    vbd.unplug()
+    vbd.destroy()
+
+
+@pytest.fixture(scope="module")
+def local_temp_dir():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield tmpdir
+
+
+@pytest.fixture(scope="module")
+def temp_dir(running_unix_vm_with_fio):
+    vm = running_unix_vm_with_fio
+    tempdir = vm.ssh("mktemp -d")
+
+    yield tempdir
+
+    # teardown
+    vm.ssh(f"rm -r {tempdir}")
+
+
+def pytest_addoption(parser):
+    system_memory = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
+
+    parser.addoption(
+        "--prev-csv",
+        action="store",
+        default=None,
+        help="Path/URI to previous CSV results file for comparison",
+    )
+    parser.addoption(
+        "--block-sizes",
+        action="store",
+        type=lambda value: str_to_tuple(value, sep=","),
+        default=("4k", "16k", "64k", "1M"),
+        help="Comma separated values of block sizes to test in disk benchmarks",
+    )
+    parser.addoption(
+        "--file-sizes",
+        action="store",
+        type=lambda value: str_to_tuple(value, sep=","),
+        default=("1G", "4G", f"{int((system_memory // (1024.**3)) * 2)}G"), # (2*Memory) GiB
+        help="Comma separated values of file sizes to test in disk benchmarks",
+    )
+    parser.addoption(
+        "--modes",
+        action="store",
+        type=lambda value: str_to_tuple(value, sep=","),
+        default=("read", "randread", "write", "randwrite"),
+        help="Comma separated values of rw_modes to test in disk benchmarks",
+    )
+    parser.addoption(
+        "--numjobs",
+        action="store",
+        default=1,
+        help="Mapped to fio's --numjobs",
+    )
+    parser.addoption(
+        "--iodepth",
+        action="store",
+        default=1,
+        help="Mapped to fio's --iodepth",
+    )
+    parser.addoption(
+        "--regression_threshold",
+        action="store",
+        default=10,
+        help="Percentage of regression that will cause the test to fail",
+    )
+    parser.addoption(
+        "--improvement_threshold",
+        action="store",
+        default=10,
+        help="Minimum percentage of improvement considered significant enough to report",
+    )
+
+
+def pytest_generate_tests(metafunc):
+    if {"block_size", "file_size", "rw_mode"} <= set(metafunc.fixturenames):
+        block_sizes = metafunc.config.getoption("block_sizes")
+        file_sizes = metafunc.config.getoption("file_sizes")
+        modes = metafunc.config.getoption("modes")
+
+        test_cases = list(itertools.product(block_sizes, file_sizes, modes))
+        metafunc.parametrize("block_size,file_size,rw_mode", test_cases)
+
+
+@pytest.fixture(scope="session")
+def prev_results(pytestconfig):
+    csv_uri = pytestconfig.getoption("--prev-csv")
+    if not csv_uri:
+        return {}
+    csv_path = csv_uri
+    if urlparse(csv_uri).scheme != "":
+        logging.info("Detected CSV path as an url")
+        csv_path = f"/tmp/{uuid4()}.csv"
+        urllib.request.urlretrieve(csv_uri, csv_path)
+        logging.info(f"Fetching CSV file from {csv_uri} to {csv_path}")
+    if not os.path.exists(csv_path):
+        raise FileNotFoundError(csv_path)
+    return load_results_from_csv(csv_path)
diff --git a/tests/storage/benchmarks/helpers.py b/tests/storage/benchmarks/helpers.py
new file mode 100644
index 000000000..ce34796ce
--- /dev/null
+++ b/tests/storage/benchmarks/helpers.py
@@ -0,0 +1,52 @@
+import csv
+import os
+import statistics
+from datetime import datetime
+
+system_memory = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
+
+
+def log_result_csv(test_type, rw_mode, result_json, csv_path):
+    job = result_json["jobs"][0]
+    op_data = job[rw_mode.replace("rand", "")]
+    bw_kbps = op_data["bw"]
+    iops = op_data["iops"]
+    latency = op_data["lat_ns"]["mean"]
+
+    result = {
+        "timestamp": datetime.now().isoformat(),
+        "test": test_type,
+        "mode": rw_mode,
+        "bw_MBps": round(bw_kbps / 1024, 2),
+        "IOPS": round(iops, 2),
+        "latency": round(latency, 2),
+    }
+
+    file_exists = os.path.exists(csv_path)
+    with open(csv_path, "a", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=result.keys())
+        if not file_exists:
+            writer.writeheader()
+        writer.writerow(result)
+
+    return result
+
+
+def load_results_from_csv(csv_path):
+    results = {}
+    with open(csv_path, newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            key = (row["test"], row["mode"])
+            if key not in results:
+                results[key] = []
+            results[key].append(row)
+    return results
+
+
+def mean(data, key):
+    return statistics.mean([float(x[key]) for x in data if key in x])
+
+
+def str_to_tuple(value, sep=","):
+    return tuple(item.strip() for item in value.split(sep))
diff --git a/tests/storage/benchmarks/test_disk_perf.py b/tests/storage/benchmarks/test_disk_perf.py
new file mode 100644
index 000000000..075e9b09e
--- /dev/null
+++ b/tests/storage/benchmarks/test_disk_perf.py
@@ -0,0 +1,142 @@
+import json
+import logging
+import os
+import statistics
+from datetime import datetime
+
+import pytest
+
+from lib.commands import SSHCommandFailed
+
+from .helpers import load_results_from_csv, log_result_csv, mean
+
+CSV_FILE = f"/tmp/results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
+
+DEFAULT_SAMPLES_NUM = 10
+DEFAULT_SIZE = "1G"
+DEFAULT_BS = "4k"
+DEFAULT_IODEPTH = 1
+DEFAULT_NUMJOBS = 1
+DEFAULT_FILE = "fio-testfile"
+
+
+def run_fio(
+    vm,
+    test_name,
+    rw_mode,
+    temp_dir,
+    local_temp_dir,
+    bs=DEFAULT_BS,
+    iodepth=DEFAULT_IODEPTH,
+    size=DEFAULT_SIZE,
+    numjobs=DEFAULT_NUMJOBS,
+    file_path="",
+):
+    json_output_path = os.path.join(temp_dir, f"{test_name}.json")
+    local_json_path = os.path.join(local_temp_dir, f"{test_name}.json")
+    if not file_path:
+        file_path = os.path.join(temp_dir, DEFAULT_FILE)
+    fio_cmd = [
+        "fio",
+        f"--name={test_name}",
+        f"--rw={rw_mode}",
+        f"--bs={bs}",
+        f"--iodepth={iodepth}",
+        f"--size={size}",
+        f"--filename={file_path}",
+        "--direct=1",
+        "--end_fsync=1",
+        "--fsync_on_close=1",
+        f"--numjobs={numjobs}",
+        "--group_reporting",
+        "--output-format=json",
+        f"--output={json_output_path}",
+    ]
+    logging.debug(f"Running {fio_cmd}")
+    try:
+        vm.ssh(fio_cmd, check=True)
+    except SSHCommandFailed as e:
+        raise RuntimeError(f"fio failed for {test_name}:{e}")
+    vm.scp(json_output_path, local_json_path, local_dest=True)
+    logging.debug(f"Stored json at {local_json_path}")
+    with open(local_json_path) as f:
+        return json.load(f)
+
+
+def assert_performance_not_degraded(
+    current, previous, regression_threshold=10, improvement_threshold=10
+):
+    diffs = {}
+    for metric in ("bw_MBps", "IOPS", "latency"):
+        try:
+            curr = mean(current, metric)
+            prev = mean(previous, metric)
+        except statistics.StatisticsError:
+            logging.info(f"Missing metric ({metric}), skipping comparison")
+            continue
+        diff = (curr - prev if metric == "latency" else prev - curr) / prev * 100
+        if diff < improvement_threshold:
+            logging.info(f"{metric} improved by {abs(diff):.2f}%")
+        assert (
+            diff <= regression_threshold
+        ), f"{metric} regressed by {diff:.2f}% (allowed {regression_threshold}%)"
+        diffs[metric] = diff
+
+    logging.info("Performance difference summary:")
+    for k, v in diffs.items():
+        sign = "+" if v < 0 else "-"
+        logging.info(f"- {k}: {sign}{abs(v):.2f}%")
+
+
+class TestDiskPerf:
+
+    @pytest.mark.small_vm
+    def test_disk_benchmark(
+        self,
+        pytestconfig,
+        temp_dir,
+        local_temp_dir,
+        prev_results,
+        block_size,
+        file_size,
+        rw_mode,
+        running_unix_vm_with_fio,
+        plugged_vbd,
+        image_format,
+    ):
+        vm = running_unix_vm_with_fio
+        vbd = plugged_vbd
+        device = f"/dev/{vbd.param_get(param_name='device')}"
+        test_type = "bench-fio-{}-{}-{}-{}-{}-{}".format(
+            block_size,
+            file_size,
+            pytestconfig.getoption("iodepth"),
+            pytestconfig.getoption("numjobs"),
+            rw_mode,
+            image_format,
+        )
+
+        for i in range(DEFAULT_SAMPLES_NUM):
+            result = run_fio(
+                vm,
+                test_type,
+                rw_mode,
+                temp_dir,
+                local_temp_dir,
+                file_path=device,
+                bs=block_size,
+                size=file_size,
+                iodepth=pytestconfig.getoption("iodepth"),
+                numjobs=pytestconfig.getoption("numjobs"),
+            )
+            summary = log_result_csv(test_type, rw_mode, result, CSV_FILE)
+            assert summary["IOPS"] > 0
+        key = (test_type, rw_mode)
+        if prev_results and key in prev_results:
+            results = load_results_from_csv(CSV_FILE)
+            assert_performance_not_degraded(
+                results[key],
+                prev_results[key],
+                regression_threshold=pytestconfig.getoption("regression_threshold"),
+                improvement_threshold=pytestconfig.getoption("improvement_threshold"),
+            )