Profile a few values for coalesce_max_gap

aldenks · aldenks · commit 2c18e2e5da33 · 2025-04-30T22:10:38.000-04:00
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
@@ -198,6 +198,7 @@ def test_sharding_partial_read(
     assert np.all(read_data == 1)
 
 
+@pytest.mark.skip("This is profiling rather than a test")
 @pytest.mark.slow_hypothesis
 @pytest.mark.parametrize("store", ["local"], indirect=["store"])
 def test_partial_shard_read_performance(store: Store) -> None:
@@ -230,21 +231,29 @@ def test_partial_shard_read_performance(store: Store) -> None:
 
     num_calls = 20
     experiments = []
-    for concurrency, statement in product([1, 10, 100], ["a[0, :, :]", "a[:, 0, :]", "a[:, :, 0]"]):
+    for concurrency, coalesce_max_gap, statement in product(
+        [1, 10, 100], [-1, 2**20, 10 * 2**20], ["a[0, :, :]", "a[:, 0, :]", "a[:, :, 0]"]
+    ):
         store_mock.reset_mock()
-        zarr.config.set({"async.concurrency": concurrency})
+        zarr.config.set(
+            {
+                "async.concurrency": concurrency,
+                "sharding.read.coalesce_max_gap_bytes": coalesce_max_gap,
+            }
+        )
         # Each timeit call accesses a 512x512 slice covering 64 chunks
         time = timeit(statement, number=num_calls, globals={"a": a}) / num_calls
         experiments.append(
             {
                 "concurrency": concurrency,
+                "coalesce_max_gap": coalesce_max_gap,
                 "statement": statement,
                 "time": time,
                 "store_get_calls": store_mock.get.call_count,
             }
         )
 
-    with open("zarr-python-partial-shard-read-performance.json", "w") as f:
+    with open("zarr-python-partial-shard-read-performance-with-coalesce.json", "w") as f:
         json.dump(experiments, f)