diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
new file mode 100644
index 0000000000..c6e32fc9ef
--- /dev/null
+++ b/.github/workflows/codspeed.yml
@@ -0,0 +1,30 @@
+name: CodSpeed Benchmarks
+
+on:
+  push:
+    branches:
+      - "main" # or "master"
+  pull_request:
+  # `workflow_dispatch` allows CodSpeed to trigger backtest
+  # performance analysis in order to generate initial data.
+  workflow_dispatch:
+
+jobs:
+  benchmarks:
+    name: Run benchmarks
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v5
+      with:
+        fetch-depth: 0  # grab all branches and tags
+    - name: Set up Python
+      uses: actions/setup-python@v6
+    - name: Install Hatch
+      run: |
+        python -m pip install --upgrade pip
+        pip install hatch
+    - name: Run the benchmarks
+      uses: CodSpeedHQ/action@v4
+      with:
+          mode: instrumentation
+          run: hatch run test.py3.11-1.26-minimal:run-benchmark
diff --git a/changes/3562.misc.md b/changes/3562.misc.md
new file mode 100644
index 0000000000..e164ab39f8
--- /dev/null
+++ b/changes/3562.misc.md
@@ -0,0 +1 @@
+Add continuous performance benchmarking infrastructure.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 7f14971396..c6a6b20f5e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,6 +83,8 @@ test = [
     'numpydoc',
     "hypothesis",
     "pytest-xdist",
+    "pytest-benchmark",
+    "pytest-codspeed",
     "packaging",
     "tomlkit",
     "uv",
@@ -181,6 +183,7 @@ run-pytest = "run"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
 run-hypothesis = "run-coverage -nauto --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*"
+run-benchmark = "pytest --benchmark-enable tests/benchmarks"
 list-env = "pip list"
 
 [tool.hatch.envs.gputest]
@@ -405,7 +408,12 @@ doctest_optionflags = [
     "IGNORE_EXCEPTION_DETAIL",
 ]
 addopts = [
-    "--durations=10", "-ra", "--strict-config", "--strict-markers",
+    "--benchmark-columns", "min,mean,stddev,outliers,rounds,iterations",
+    "--benchmark-group-by", "group",
+    "--benchmark-warmup", "on",
+    "--benchmark-disable", # run benchmark routines but don't do benchmarking
+    "--durations", "10",
+    "-ra", "--strict-config", "--strict-markers",
 ]
 filterwarnings = [
     "error",
diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/benchmarks/common.py b/tests/benchmarks/common.py
new file mode 100644
index 0000000000..e8809156a6
--- /dev/null
+++ b/tests/benchmarks/common.py
@@ -0,0 +1,8 @@
+from dataclasses import dataclass
+
+
+@dataclass(kw_only=True, frozen=True)
+class Layout:
+    shape: tuple[int, ...]
+    chunks: tuple[int, ...]
+    shards: tuple[int, ...] | None
diff --git a/tests/benchmarks/test_e2e.py b/tests/benchmarks/test_e2e.py
new file mode 100644
index 0000000000..1a5a265646
--- /dev/null
+++ b/tests/benchmarks/test_e2e.py
@@ -0,0 +1,79 @@
+"""
+Benchmarks for end-to-end read/write performance of Zarr
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from tests.benchmarks.common import Layout
+
+if TYPE_CHECKING:
+    from pytest_benchmark.fixture import BenchmarkFixture
+
+    from zarr.abc.store import Store
+    from zarr.core.common import NamedConfig
+from operator import getitem, setitem
+from typing import Any, Literal
+
+import pytest
+
+from zarr import create_array
+
+CompressorName = Literal["gzip"] | None
+
+compressors: dict[CompressorName, NamedConfig[Any, Any] | None] = {
+    None: None,
+    "gzip": {"name": "gzip", "configuration": {"level": 1}},
+}
+
+
+layouts: tuple[Layout, ...] = (
+    Layout(shape=(1024**2,), chunks=(1024,), shards=None),
+    Layout(shape=(1024**2,), chunks=(1024,), shards=(1024,)),
+    Layout(shape=(1024**2,), chunks=(1024,), shards=(1024 * 64,)),
+)
+
+
+@pytest.mark.parametrize("compression_name", [None, "gzip"])
+@pytest.mark.parametrize("layout", layouts, ids=str)
+@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
+def test_write_array(
+    store: Store, layout: Layout, compression_name: CompressorName, benchmark: BenchmarkFixture
+) -> None:
+    """
+    Test the time required to fill an array with a single value
+    """
+    arr = create_array(
+        store,
+        dtype="uint8",
+        shape=layout.shape,
+        chunks=layout.chunks,
+        shards=layout.shards,
+        compressors=compressors[compression_name],  # type: ignore[arg-type]
+        fill_value=0,
+    )
+
+    benchmark(setitem, arr, Ellipsis, 1)
+
+
+@pytest.mark.parametrize("compression_name", [None, "gzip"])
+@pytest.mark.parametrize("layout", layouts)
+@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
+def test_read_array(
+    store: Store, layout: Layout, compression_name: CompressorName, benchmark: BenchmarkFixture
+) -> None:
+    """
+    Test the time required to fill an array with a single value
+    """
+    arr = create_array(
+        store,
+        dtype="uint8",
+        shape=layout.shape,
+        chunks=layout.chunks,
+        shards=layout.shards,
+        compressors=compressors[compression_name],  # type: ignore[arg-type]
+        fill_value=0,
+    )
+    arr[:] = 1
+    benchmark(getitem, arr, Ellipsis)