Skip to content

Commit b373876

Browse files
authored
Merge pull request #13893 from wietzesuijker/perf/zarr-v3-first-probe
Zarr: probe zarr.json before v2 files in OpenRootGroup()
2 parents ffaaf93 + e31b1ae commit b373876

File tree

2 files changed

+61
-60
lines changed

2 files changed

+61
-60
lines changed

autotest/gdrivers/zarr_driver.py

Lines changed: 52 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -6223,62 +6223,59 @@ def test_zarr_read_simple_sharding_network():
62236223
chunk_block = b"\x01\x02\x03\x04" + (16384 - 4) * b"\x00"
62246224

62256225
try:
6226-
# Loop twice: second iteration proves ClearMemoryCaches() lets the
6227-
# driver re-fetch everything cleanly.
6228-
for _ in range(2):
6229-
handler = webserver.SequentialHandler()
6230-
handler.add("GET", "/test.zarr/", 404)
6231-
handler.add("HEAD", "/test.zarr/.zmetadata", 404)
6232-
handler.add("HEAD", "/test.zarr/.zarray", 404)
6233-
handler.add("HEAD", "/test.zarr/.zgroup", 404)
6234-
handler.add(
6235-
"HEAD",
6236-
"/test.zarr/zarr.json",
6237-
200,
6238-
{"Content-Length": "%d" % len(zarr_json)},
6239-
)
6240-
handler.add(
6241-
"GET",
6242-
"/test.zarr/zarr.json",
6243-
200,
6244-
{"Content-Length": "%d" % len(zarr_json)},
6245-
zarr_json,
6246-
)
6247-
handler.add("HEAD", "/test.zarr/zarr.json.aux.xml", 404)
6248-
handler.add("HEAD", "/test.zarr/zarr.aux", 404)
6249-
handler.add("HEAD", "/test.zarr/zarr.AUX", 404)
6250-
handler.add("HEAD", "/test.zarr/zarr.json.aux", 404)
6251-
handler.add("HEAD", "/test.zarr/zarr.json.AUX", 404)
6252-
handler.add("HEAD", "/test.zarr/c/0/0", 200, {"Content-Length": "65536"})
6253-
handler.add(
6254-
"GET",
6255-
"/test.zarr/c/0/0",
6256-
206,
6257-
{
6258-
"Content-Length": "16384",
6259-
"Content-Range": "bytes 49152-65535/65536",
6260-
},
6261-
shard_tail,
6262-
expected_headers={"Range": "bytes=49152-65535"},
6263-
)
6264-
handler.add(
6265-
"GET",
6266-
"/test.zarr/c/0/0",
6267-
206,
6268-
{
6269-
"Content-Length": "16384",
6270-
"Content-Range": "bytes 0-16383/65536",
6271-
},
6272-
chunk_block,
6273-
expected_headers={"Range": "bytes=0-16383"},
6274-
)
6275-
with webserver.install_http_handler(handler):
6276-
ds = gdal.Open(
6277-
'ZARR:"/vsicurl/http://localhost:%d/test.zarr"' % webserver_port
6226+
with gdaltest.config_options({"GDAL_PAM_ENABLED": "NO"}):
6227+
# Loop twice: second iteration proves ClearMemoryCaches() lets the
6228+
# driver re-fetch everything cleanly.
6229+
for _ in range(2):
6230+
handler = webserver.SequentialHandler()
6231+
handler.add("GET", "/test.zarr/", 404)
6232+
handler.add("HEAD", "/test.zarr/.zmetadata", 404)
6233+
# Probe zarr.json first; v2 probes (.zarray, .zgroup) skipped.
6234+
handler.add(
6235+
"HEAD",
6236+
"/test.zarr/zarr.json",
6237+
200,
6238+
{"Content-Length": "%d" % len(zarr_json)},
6239+
)
6240+
handler.add(
6241+
"GET",
6242+
"/test.zarr/zarr.json",
6243+
200,
6244+
{"Content-Length": "%d" % len(zarr_json)},
6245+
zarr_json,
6246+
)
6247+
handler.add(
6248+
"HEAD", "/test.zarr/c/0/0", 200, {"Content-Length": "65536"}
6249+
)
6250+
handler.add(
6251+
"GET",
6252+
"/test.zarr/c/0/0",
6253+
206,
6254+
{
6255+
"Content-Length": "16384",
6256+
"Content-Range": "bytes 49152-65535/65536",
6257+
},
6258+
shard_tail,
6259+
expected_headers={"Range": "bytes=49152-65535"},
6260+
)
6261+
handler.add(
6262+
"GET",
6263+
"/test.zarr/c/0/0",
6264+
206,
6265+
{
6266+
"Content-Length": "16384",
6267+
"Content-Range": "bytes 0-16383/65536",
6268+
},
6269+
chunk_block,
6270+
expected_headers={"Range": "bytes=0-16383"},
62786271
)
6279-
assert ds.GetRasterBand(1).ReadBlock(0, 0) == b"\x01\x02\x03\x04"
6280-
ds = None
6281-
gdal.ClearMemoryCaches()
6272+
with webserver.install_http_handler(handler):
6273+
ds = gdal.Open(
6274+
'ZARR:"/vsicurl/http://localhost:%d/test.zarr"' % webserver_port
6275+
)
6276+
assert ds.GetRasterBand(1).ReadBlock(0, 0) == b"\x01\x02\x03\x04"
6277+
ds = None
6278+
gdal.ClearMemoryCaches()
62826279

62836280
finally:
62846281
webserver.server_stop(webserver_process, webserver_port)

frmts/zarr/zarr_sharedresource.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ ZarrSharedResource::~ZarrSharedResource()
8989

9090
std::shared_ptr<ZarrGroupBase> ZarrSharedResource::OpenRootGroup()
9191
{
92+
// Probe zarr.json first so v3 datasets skip the v2 stat cascade.
93+
const std::string osZarrJsonFilename(CPLFormFilenameSafe(
94+
m_osRootDirectoryName.c_str(), "zarr.json", nullptr));
95+
VSIStatBufL sStat;
96+
const bool bHasZarrJson =
97+
(VSIStatL(osZarrJsonFilename.c_str(), &sStat) == 0);
98+
99+
if (!bHasZarrJson)
92100
{
93101
auto poRG = ZarrV2Group::Create(shared_from_this(), std::string(), "/");
94102
// Prevents potential recursion
@@ -98,7 +106,6 @@ std::shared_ptr<ZarrGroupBase> ZarrSharedResource::OpenRootGroup()
98106

99107
const std::string osZarrayFilename(CPLFormFilenameSafe(
100108
m_osRootDirectoryName.c_str(), ".zarray", nullptr));
101-
VSIStatBufL sStat;
102109
const auto nErrorCount = CPLGetErrorCounter();
103110
if (VSIStatL(osZarrayFilename.c_str(), &sStat) == 0)
104111
{
@@ -184,10 +191,7 @@ std::shared_ptr<ZarrGroupBase> ZarrSharedResource::OpenRootGroup()
184191
m_poWeakRootGroup = poRG_V3;
185192
poRG_V3->SetUpdatable(m_bUpdatable);
186193

187-
const std::string osZarrJsonFilename(CPLFormFilenameSafe(
188-
m_osRootDirectoryName.c_str(), "zarr.json", nullptr));
189-
VSIStatBufL sStat;
190-
if (VSIStatL(osZarrJsonFilename.c_str(), &sStat) == 0)
194+
if (bHasZarrJson)
191195
{
192196
CPLJSONDocument oDoc;
193197
if (!oDoc.Load(osZarrJsonFilename))

0 commit comments

Comments
 (0)