|
| 1 | +""" |
| 2 | +ZEP 8 URL Syntax Demo |
| 3 | +
|
| 4 | +This script demonstrates ZEP 8 URL syntax for chained store access in zarr-python. |
| 5 | +ZEP 8 URLs allow you to chain different storage adapters using pipe (|) syntax. |
| 6 | +
|
| 7 | +Examples of ZEP 8 URLs: |
| 8 | + - "memory:" - Simple in-memory store |
| 9 | + - "file:/path/data.zip|zip:" - ZIP file access |
| 10 | + - "s3://bucket/data.zip|zip:|zarr3:" - Cloud ZIP with zarr3 format |
| 11 | + - "file:/path/repo|icechunk:branch:main" - Icechunk repository (if available) |
| 12 | +
|
| 13 | +For comprehensive Icechunk integration examples, see the icechunk repository tests. |
| 14 | +""" |
| 15 | + |
| 16 | +import tempfile |
| 17 | +from pathlib import Path |
| 18 | + |
| 19 | +import numpy as np |
| 20 | + |
| 21 | +import zarr |
| 22 | +from zarr.core.url_syntax import URLParser, is_zep8_url |
| 23 | +from zarr.registry import get_store_adapter |
| 24 | +from zarr.storage import ZipStore |
| 25 | + |
| 26 | + |
| 27 | +def demo_basic_zep8() -> None: |
| 28 | + """Demonstrate basic ZEP 8 URL syntax features.""" |
| 29 | + print("=== Basic ZEP 8 URL Demo ===") |
| 30 | + |
| 31 | + try: |
| 32 | + print("📝 Testing basic ZEP 8 URL formats") |
| 33 | + |
| 34 | + # Memory store |
| 35 | + print("\n1. Memory store:") |
| 36 | + memory_url = "memory:" |
| 37 | + root = zarr.open_group(memory_url, mode="w") |
| 38 | + arr = root.create_array("test_data", shape=(10,), dtype="f4") |
| 39 | + arr[:] = np.random.random(10) |
| 40 | + print(f"✅ Created array via {memory_url}") |
| 41 | + print(f" Data shape: {arr.shape}, dtype: {arr.dtype}") |
| 42 | + |
| 43 | + # File store |
| 44 | + print("\n2. File store:") |
| 45 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 46 | + file_url = f"file:{tmpdir}/test.zarr" |
| 47 | + root2 = zarr.open_group(file_url, mode="w") |
| 48 | + arr2 = root2.create_array("persistent_data", shape=(20,), dtype="i4") |
| 49 | + arr2[:] = range(20) |
| 50 | + print(f"✅ Created array via {file_url}") |
| 51 | + print(f" Data: {list(arr2[:5])}... (first 5 elements)") |
| 52 | + |
| 53 | + except Exception as e: |
| 54 | + print(f"❌ Demo failed: {e}") |
| 55 | + |
| 56 | + |
| 57 | +def demo_zip_chaining() -> None: |
| 58 | + """Demonstrate ZIP file chaining with ZEP 8 URLs.""" |
| 59 | + print("\n=== ZIP Chaining Demo ===") |
| 60 | + |
| 61 | + try: |
| 62 | + print("📝 Creating ZIP file with zarr data, then accessing via ZEP 8 URL") |
| 63 | + |
| 64 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 65 | + zip_path = Path(tmpdir) / "data.zip" |
| 66 | + |
| 67 | + # Step 1: Create ZIP file with zarr data |
| 68 | + print(f"Creating ZIP file at: {zip_path}") |
| 69 | + with ZipStore(str(zip_path), mode="w") as zip_store: |
| 70 | + root = zarr.open_group(zip_store, mode="w") |
| 71 | + |
| 72 | + # Create sample datasets |
| 73 | + temps = root.create_array("temperatures", shape=(365,), dtype="f4") |
| 74 | + temps[:] = ( |
| 75 | + 20 + 10 * np.sin(np.arange(365) * 2 * np.pi / 365) + np.random.normal(0, 2, 365) |
| 76 | + ) |
| 77 | + temps.attrs["units"] = "celsius" |
| 78 | + temps.attrs["description"] = "Daily temperature readings" |
| 79 | + |
| 80 | + metadata = root.create_group("metadata") |
| 81 | + info = metadata.create_array("info", shape=(1,), dtype="U50") |
| 82 | + info[0] = "Weather data from ZIP demo" |
| 83 | + |
| 84 | + print("✅ Created temperature data in ZIP file") |
| 85 | + print(f" Temperature range: {temps[:].min():.1f}°C to {temps[:].max():.1f}°C") |
| 86 | + |
| 87 | + # Step 2: Access via ZEP 8 URL syntax |
| 88 | + print("\nAccessing ZIP data via ZEP 8 URL") |
| 89 | + zip_url = f"file:{zip_path}|zip:" |
| 90 | + root_read = zarr.open_group(zip_url, mode="r") |
| 91 | + |
| 92 | + temps_read = root_read["temperatures"] |
| 93 | + info_read = root_read["metadata/info"] |
| 94 | + |
| 95 | + print(f"✅ Successfully read via URL: {zip_url}") |
| 96 | + print(f" Temperature units: {temps_read.attrs['units']}") |
| 97 | + print(f" Description: {temps_read.attrs['description']}") |
| 98 | + print(f" Metadata: {info_read[0]}") |
| 99 | + print(f" Data integrity: {np.array_equal(temps[:], temps_read[:])}") |
| 100 | + |
| 101 | + except ImportError as e: |
| 102 | + print(f"❌ Required dependencies missing: {e}") |
| 103 | + except Exception as e: |
| 104 | + print(f"❌ Demo failed: {e}") |
| 105 | + |
| 106 | + |
| 107 | +def demo_adapter_registry() -> None: |
| 108 | + """Show available store adapters and their usage.""" |
| 109 | + print("\n=== Store Adapter Registry Demo ===") |
| 110 | + |
| 111 | + try: |
| 112 | + print("📋 Testing common store adapters:") |
| 113 | + |
| 114 | + # Test builtin adapters |
| 115 | + builtin_adapters = ["memory", "file", "zip"] |
| 116 | + |
| 117 | + for adapter_name in builtin_adapters: |
| 118 | + try: |
| 119 | + adapter_cls = get_store_adapter(adapter_name) |
| 120 | + print(f"✅ {adapter_name}: {adapter_cls.__name__}") |
| 121 | + except KeyError: |
| 122 | + print(f"❌ {adapter_name}: Not registered") |
| 123 | + |
| 124 | + # Test optional adapters |
| 125 | + optional_adapters = ["icechunk", "ic", "s3", "gcs"] |
| 126 | + print("\n📋 Testing optional adapters:") |
| 127 | + |
| 128 | + for adapter_name in optional_adapters: |
| 129 | + try: |
| 130 | + adapter_cls = get_store_adapter(adapter_name) |
| 131 | + print(f"✅ {adapter_name}: {adapter_cls.__name__}") |
| 132 | + except KeyError: |
| 133 | + print(f"i {adapter_name}: Not available (requires additional packages)") |
| 134 | + |
| 135 | + except Exception as e: |
| 136 | + print(f"❌ Demo failed: {e}") |
| 137 | + |
| 138 | + |
| 139 | +def demo_url_parsing() -> None: |
| 140 | + """Demonstrate ZEP 8 URL parsing and validation.""" |
| 141 | + print("\n=== URL Parsing Demo ===") |
| 142 | + |
| 143 | + try: |
| 144 | + parser = URLParser() |
| 145 | + |
| 146 | + test_urls = [ |
| 147 | + "memory:", |
| 148 | + "file:/tmp/data.zarr", |
| 149 | + "file:/tmp/data.zip|zip:", |
| 150 | + "s3://bucket/data.zip|zip:|zarr3:", |
| 151 | + "memory:|icechunk:branch:main", # This would be rejected by icechunk adapter |
| 152 | + "/regular/file/path", # Not a ZEP 8 URL |
| 153 | + ] |
| 154 | + |
| 155 | + print("📝 Testing URL parsing:") |
| 156 | + |
| 157 | + for url in test_urls: |
| 158 | + is_zep8 = is_zep8_url(url) |
| 159 | + print(f"\n URL: {url}") |
| 160 | + print(f" ZEP 8: {is_zep8}") |
| 161 | + |
| 162 | + if is_zep8: |
| 163 | + try: |
| 164 | + segments = parser.parse(url) |
| 165 | + print(f" Segments: {len(segments)}") |
| 166 | + for i, seg in enumerate(segments): |
| 167 | + scheme_part = f"scheme={seg.scheme}" if seg.scheme else "" |
| 168 | + adapter_part = f"adapter={seg.adapter}" if seg.adapter else "" |
| 169 | + path_part = f"path='{seg.path}'" if seg.path else "" |
| 170 | + parts = [p for p in [scheme_part, adapter_part, path_part] if p] |
| 171 | + print(f" {i}: {', '.join(parts)}") |
| 172 | + except Exception as e: |
| 173 | + print(f" Parse error: {e}") |
| 174 | + |
| 175 | + except Exception as e: |
| 176 | + print(f"❌ Demo failed: {e}") |
| 177 | + |
| 178 | + |
| 179 | +def demo_error_cases() -> None: |
| 180 | + """Demonstrate common error cases and their handling.""" |
| 181 | + print("\n=== Error Handling Demo ===") |
| 182 | + |
| 183 | + try: |
| 184 | + print("🚫 Testing error cases:") |
| 185 | + |
| 186 | + # Test 1: Invalid URL format |
| 187 | + print("\n1. Invalid URL formats:") |
| 188 | + invalid_urls = [ |
| 189 | + "|invalid:start", # Starts with pipe |
| 190 | + "memory:|", # Ends with pipe |
| 191 | + "memory:||zip:", # Double pipe |
| 192 | + "", # Empty URL |
| 193 | + ] |
| 194 | + |
| 195 | + for url in invalid_urls: |
| 196 | + try: |
| 197 | + zarr.open_group(url, mode="r") |
| 198 | + print(f"❌ Should have failed: {url}") |
| 199 | + except Exception as e: |
| 200 | + print(f"✅ Correctly rejected: {url} -> {type(e).__name__}") |
| 201 | + |
| 202 | + # Test 2: Unknown adapters |
| 203 | + print("\n2. Unknown adapters:") |
| 204 | + try: |
| 205 | + zarr.open_group("memory:|unknown_adapter:", mode="r") |
| 206 | + print("❌ Should have failed: unknown adapter") |
| 207 | + except Exception as e: |
| 208 | + print(f"✅ Correctly rejected unknown adapter -> {type(e).__name__}") |
| 209 | + |
| 210 | + # Test 3: Fallback behavior |
| 211 | + print("\n3. Fallback to regular stores:") |
| 212 | + regular_urls = ["memory:", f"file:{tempfile.mkdtemp()}/fallback.zarr"] |
| 213 | + |
| 214 | + for url in regular_urls: |
| 215 | + try: |
| 216 | + root = zarr.open_group(url, mode="w") |
| 217 | + arr = root.create_array("data", shape=(5,), dtype="i4") |
| 218 | + arr[:] = [1, 2, 3, 4, 5] |
| 219 | + print(f"✅ Fallback works: {url}") |
| 220 | + except Exception as e: |
| 221 | + print(f"❌ Fallback failed: {url} -> {e}") |
| 222 | + |
| 223 | + except Exception as e: |
| 224 | + print(f"❌ Demo failed: {e}") |
| 225 | + |
| 226 | + |
| 227 | +if __name__ == "__main__": |
| 228 | + print("ZEP 8 URL Syntax Demo") |
| 229 | + print("=" * 30) |
| 230 | + |
| 231 | + demo_basic_zep8() |
| 232 | + demo_zip_chaining() |
| 233 | + demo_adapter_registry() |
| 234 | + demo_url_parsing() |
| 235 | + demo_error_cases() |
| 236 | + |
| 237 | + print("\n" + "=" * 30) |
| 238 | + print("Demo completed!") |
| 239 | + print("\nZEP 8 URL syntax enables flexible chaining of storage adapters.") |
| 240 | + print("For adapter-specific examples (like Icechunk), see the respective") |
| 241 | + print("package repositories and their test suites.") |
0 commit comments