Skip to content

Commit 01b12a8

Browse files
committed
feature: support zep8 strings when opening groups and arrays
1 parent 926a52f commit 01b12a8

File tree

16 files changed

+2160
-13
lines changed

16 files changed

+2160
-13
lines changed

examples/zep8_url_demo.py

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
"""
2+
ZEP 8 URL Syntax Demo
3+
4+
This script demonstrates ZEP 8 URL syntax for chained store access in zarr-python.
5+
ZEP 8 URLs allow you to chain different storage adapters using pipe (|) syntax.
6+
7+
Examples of ZEP 8 URLs:
8+
- "memory:" - Simple in-memory store
9+
- "file:/path/data.zip|zip:" - ZIP file access
10+
- "s3://bucket/data.zip|zip:|zarr3:" - Cloud ZIP with zarr3 format
11+
- "file:/path/repo|icechunk:branch:main" - Icechunk repository (if available)
12+
13+
For comprehensive Icechunk integration examples, see the icechunk repository tests.
14+
"""
15+
16+
import tempfile
17+
from pathlib import Path
18+
19+
import numpy as np
20+
21+
import zarr
22+
from zarr.core.url_syntax import URLParser, is_zep8_url
23+
from zarr.registry import get_store_adapter
24+
from zarr.storage import ZipStore
25+
26+
27+
def demo_basic_zep8() -> None:
28+
"""Demonstrate basic ZEP 8 URL syntax features."""
29+
print("=== Basic ZEP 8 URL Demo ===")
30+
31+
try:
32+
print("📝 Testing basic ZEP 8 URL formats")
33+
34+
# Memory store
35+
print("\n1. Memory store:")
36+
memory_url = "memory:"
37+
root = zarr.open_group(memory_url, mode="w")
38+
arr = root.create_array("test_data", shape=(10,), dtype="f4")
39+
arr[:] = np.random.random(10)
40+
print(f"✅ Created array via {memory_url}")
41+
print(f" Data shape: {arr.shape}, dtype: {arr.dtype}")
42+
43+
# File store
44+
print("\n2. File store:")
45+
with tempfile.TemporaryDirectory() as tmpdir:
46+
file_url = f"file:{tmpdir}/test.zarr"
47+
root2 = zarr.open_group(file_url, mode="w")
48+
arr2 = root2.create_array("persistent_data", shape=(20,), dtype="i4")
49+
arr2[:] = range(20)
50+
print(f"✅ Created array via {file_url}")
51+
print(f" Data: {list(arr2[:5])}... (first 5 elements)")
52+
53+
except Exception as e:
54+
print(f"❌ Demo failed: {e}")
55+
56+
57+
def demo_zip_chaining() -> None:
58+
"""Demonstrate ZIP file chaining with ZEP 8 URLs."""
59+
print("\n=== ZIP Chaining Demo ===")
60+
61+
try:
62+
print("📝 Creating ZIP file with zarr data, then accessing via ZEP 8 URL")
63+
64+
with tempfile.TemporaryDirectory() as tmpdir:
65+
zip_path = Path(tmpdir) / "data.zip"
66+
67+
# Step 1: Create ZIP file with zarr data
68+
print(f"Creating ZIP file at: {zip_path}")
69+
with ZipStore(str(zip_path), mode="w") as zip_store:
70+
root = zarr.open_group(zip_store, mode="w")
71+
72+
# Create sample datasets
73+
temps = root.create_array("temperatures", shape=(365,), dtype="f4")
74+
temps[:] = (
75+
20 + 10 * np.sin(np.arange(365) * 2 * np.pi / 365) + np.random.normal(0, 2, 365)
76+
)
77+
temps.attrs["units"] = "celsius"
78+
temps.attrs["description"] = "Daily temperature readings"
79+
80+
metadata = root.create_group("metadata")
81+
info = metadata.create_array("info", shape=(1,), dtype="U50")
82+
info[0] = "Weather data from ZIP demo"
83+
84+
print("✅ Created temperature data in ZIP file")
85+
print(f" Temperature range: {temps[:].min():.1f}°C to {temps[:].max():.1f}°C")
86+
87+
# Step 2: Access via ZEP 8 URL syntax
88+
print("\nAccessing ZIP data via ZEP 8 URL")
89+
zip_url = f"file:{zip_path}|zip:"
90+
root_read = zarr.open_group(zip_url, mode="r")
91+
92+
temps_read = root_read["temperatures"]
93+
info_read = root_read["metadata/info"]
94+
95+
print(f"✅ Successfully read via URL: {zip_url}")
96+
print(f" Temperature units: {temps_read.attrs['units']}")
97+
print(f" Description: {temps_read.attrs['description']}")
98+
print(f" Metadata: {info_read[0]}")
99+
print(f" Data integrity: {np.array_equal(temps[:], temps_read[:])}")
100+
101+
except ImportError as e:
102+
print(f"❌ Required dependencies missing: {e}")
103+
except Exception as e:
104+
print(f"❌ Demo failed: {e}")
105+
106+
107+
def demo_adapter_registry() -> None:
108+
"""Show available store adapters and their usage."""
109+
print("\n=== Store Adapter Registry Demo ===")
110+
111+
try:
112+
print("📋 Testing common store adapters:")
113+
114+
# Test builtin adapters
115+
builtin_adapters = ["memory", "file", "zip"]
116+
117+
for adapter_name in builtin_adapters:
118+
try:
119+
adapter_cls = get_store_adapter(adapter_name)
120+
print(f"✅ {adapter_name}: {adapter_cls.__name__}")
121+
except KeyError:
122+
print(f"❌ {adapter_name}: Not registered")
123+
124+
# Test optional adapters
125+
optional_adapters = ["icechunk", "ic", "s3", "gcs"]
126+
print("\n📋 Testing optional adapters:")
127+
128+
for adapter_name in optional_adapters:
129+
try:
130+
adapter_cls = get_store_adapter(adapter_name)
131+
print(f"✅ {adapter_name}: {adapter_cls.__name__}")
132+
except KeyError:
133+
print(f"i {adapter_name}: Not available (requires additional packages)")
134+
135+
except Exception as e:
136+
print(f"❌ Demo failed: {e}")
137+
138+
139+
def demo_url_parsing() -> None:
140+
"""Demonstrate ZEP 8 URL parsing and validation."""
141+
print("\n=== URL Parsing Demo ===")
142+
143+
try:
144+
parser = URLParser()
145+
146+
test_urls = [
147+
"memory:",
148+
"file:/tmp/data.zarr",
149+
"file:/tmp/data.zip|zip:",
150+
"s3://bucket/data.zip|zip:|zarr3:",
151+
"memory:|icechunk:branch:main", # This would be rejected by icechunk adapter
152+
"/regular/file/path", # Not a ZEP 8 URL
153+
]
154+
155+
print("📝 Testing URL parsing:")
156+
157+
for url in test_urls:
158+
is_zep8 = is_zep8_url(url)
159+
print(f"\n URL: {url}")
160+
print(f" ZEP 8: {is_zep8}")
161+
162+
if is_zep8:
163+
try:
164+
segments = parser.parse(url)
165+
print(f" Segments: {len(segments)}")
166+
for i, seg in enumerate(segments):
167+
scheme_part = f"scheme={seg.scheme}" if seg.scheme else ""
168+
adapter_part = f"adapter={seg.adapter}" if seg.adapter else ""
169+
path_part = f"path='{seg.path}'" if seg.path else ""
170+
parts = [p for p in [scheme_part, adapter_part, path_part] if p]
171+
print(f" {i}: {', '.join(parts)}")
172+
except Exception as e:
173+
print(f" Parse error: {e}")
174+
175+
except Exception as e:
176+
print(f"❌ Demo failed: {e}")
177+
178+
179+
def demo_error_cases() -> None:
180+
"""Demonstrate common error cases and their handling."""
181+
print("\n=== Error Handling Demo ===")
182+
183+
try:
184+
print("🚫 Testing error cases:")
185+
186+
# Test 1: Invalid URL format
187+
print("\n1. Invalid URL formats:")
188+
invalid_urls = [
189+
"|invalid:start", # Starts with pipe
190+
"memory:|", # Ends with pipe
191+
"memory:||zip:", # Double pipe
192+
"", # Empty URL
193+
]
194+
195+
for url in invalid_urls:
196+
try:
197+
zarr.open_group(url, mode="r")
198+
print(f"❌ Should have failed: {url}")
199+
except Exception as e:
200+
print(f"✅ Correctly rejected: {url} -> {type(e).__name__}")
201+
202+
# Test 2: Unknown adapters
203+
print("\n2. Unknown adapters:")
204+
try:
205+
zarr.open_group("memory:|unknown_adapter:", mode="r")
206+
print("❌ Should have failed: unknown adapter")
207+
except Exception as e:
208+
print(f"✅ Correctly rejected unknown adapter -> {type(e).__name__}")
209+
210+
# Test 3: Fallback behavior
211+
print("\n3. Fallback to regular stores:")
212+
regular_urls = ["memory:", f"file:{tempfile.mkdtemp()}/fallback.zarr"]
213+
214+
for url in regular_urls:
215+
try:
216+
root = zarr.open_group(url, mode="w")
217+
arr = root.create_array("data", shape=(5,), dtype="i4")
218+
arr[:] = [1, 2, 3, 4, 5]
219+
print(f"✅ Fallback works: {url}")
220+
except Exception as e:
221+
print(f"❌ Fallback failed: {url} -> {e}")
222+
223+
except Exception as e:
224+
print(f"❌ Demo failed: {e}")
225+
226+
227+
if __name__ == "__main__":
228+
print("ZEP 8 URL Syntax Demo")
229+
print("=" * 30)
230+
231+
demo_basic_zep8()
232+
demo_zip_chaining()
233+
demo_adapter_registry()
234+
demo_url_parsing()
235+
demo_error_cases()
236+
237+
print("\n" + "=" * 30)
238+
print("Demo completed!")
239+
print("\nZEP 8 URL syntax enables flexible chaining of storage adapters.")
240+
print("For adapter-specific examples (like Icechunk), see the respective")
241+
print("package repositories and their test suites.")

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,12 @@ ignore-words-list = "astroid"
428428

429429
[project.entry-points.pytest11]
430430
zarr = "zarr.testing"
431+
432+
[project.entry-points."zarr.stores"]
433+
file = "zarr.storage.builtin_adapters:FileSystemAdapter"
434+
memory = "zarr.storage.builtin_adapters:MemoryAdapter"
435+
https = "zarr.storage.builtin_adapters:HttpsAdapter"
436+
s3 = "zarr.storage.builtin_adapters:S3Adapter"
437+
gcs = "zarr.storage.builtin_adapters:GCSAdapter"
438+
gs = "zarr.storage.builtin_adapters:GSAdapter"
439+
zip = "zarr.storage._zip:ZipStoreAdapter"

0 commit comments

Comments
 (0)