dClimate
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CLAUDE.md‎
Lines changed: 87 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎fsgs.py‎
Lines changed: 1 addition & 1 deletion b/‎fsgs.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎public_gateway_example.py‎
Lines changed: 1 addition & 1 deletion b/‎public_gateway_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎py_hamt/__init__.py‎
Lines changed: 5 additions & 2 deletions b/‎py_hamt/__init__.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎py_hamt/hamt.py‎
Lines changed: 11 additions & 2 deletions b/‎py_hamt/hamt.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎py_hamt/hamt_to_sharded_converter.py‎
Lines changed: 130 additions & 0 deletions b/‎py_hamt/hamt_to_sharded_converter.py‎
Lines changed: 130 additions & 0 deletions
@@ -16,7 +16,7 @@ repos:
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.11.11
+    rev: v0.12.12
     hooks:
       - id: ruff-check
       - id: ruff-format
 
@@ -0,0 +1,87 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Common Development Commands
+
+Setup environment:
+```bash
+uv sync
+source .venv/bin/activate
+pre-commit install
+```
+
+Run all checks (tests, linting, formatting, type checking):
+```bash
+bash run-checks.sh
+```
+
+Run tests:
+```bash
+# All tests (requires IPFS daemon or Docker)
+pytest --ipfs --cov=py_hamt tests/
+
+# Quick tests without IPFS integration
+pytest --cov=py_hamt tests/
+
+# Single test file
+pytest tests/test_hamt.py
+
+# Coverage report
+uv run coverage report --fail-under=100 --show-missing
+```
+
+Linting and formatting:
+```bash
+# Run all pre-commit hooks
+uv run pre-commit run --all-files --show-diff-on-failure
+
+# Fix auto-fixable ruff issues
+uv run ruff check --fix
+```
+
+Type checking and other tools:
+```bash
+# Type checking is handled by pre-commit hooks (mypy)
+# Documentation preview
+uv run pdoc py_hamt
+```
+
+## Architecture Overview
+
+py-hamt implements a Hash Array Mapped Trie (HAMT) for IPFS/IPLD content-addressed storage. The core architecture follows this pattern:
+
+1. **ContentAddressedStore (CAS)** - Abstract storage layer (store.py)
+   - `KuboCAS` - IPFS/Kubo implementation for production
+   - `InMemoryCAS` - In-memory implementation for testing
+
+2. **HAMT** - Core data structure (hamt.py)
+   - Uses blake3 hashing by default
+   - Implements content-addressed trie for efficient key-value storage
+   - Supports async operations for large datasets
+
+3. **ZarrHAMTStore** - Zarr integration (zarr_hamt_store.py)
+   - Implements zarr.abc.store.Store interface
+   - Enables storing large Zarr arrays on IPFS via HAMT
+   - Keys stored verbatim, values as raw bytes
+
+4. **Encryption Layer** - Optional encryption (encryption_hamt_store.py)
+   - `SimpleEncryptedZarrHAMTStore` for fully encrypted storage
+
+## Key Design Patterns
+
+- All storage operations are async to handle IPFS network calls
+- Content addressing means identical data gets same hash/CID
+- HAMT provides O(log n) access time for large key sets
+- Store abstractions allow swapping storage backends
+- Type hints required throughout (mypy enforced)
+- 100% test coverage required with hypothesis property-based testing
+
+## IPFS Integration Requirements
+
+Tests require either:
+- Local IPFS daemon running (`ipfs daemon`)
+- Docker available for containerized IPFS
+- Neither (unit tests only, integration tests skip)
+
+The `--ipfs` pytest flag controls IPFS test execution.
@@ -16,7 +16,7 @@
 
 
 async def main():
-    cid = "bafyr4iecw3faqyvj75psutabk2jxpddpjdokdy5b26jdnjjzpkzbgb5xoq"
+    cid = "bafyr4ibiduv7ml3jeyl3gn6cjcrcizfqss7j64rywpbj3whr7tc6xipt3y"
 
     # Use KuboCAS as an async context manager
     async with KuboCAS() as kubo_cas:  # connects to a local kubo node
 
@@ -53,7 +53,7 @@ async def fetch_zarr_from_gateway(cid: str, gateway: str = "https://ipfs.io"):
 
 async def main():
     # Example CID - this points to a weather dataset stored on IPFS
-    cid = "bafyr4iecw3faqyvj75psutabk2jxpddpjdokdy5b26jdnjjzpkzbgb5xoq"
+    cid = "bafyr4ibiduv7ml3jeyl3gn6cjcrcizfqss7j64rywpbj3whr7tc6xipt3y"
 
     # Try different public gateways
     gateways = [
 
@@ -1,5 +1,7 @@
 from .encryption_hamt_store import SimpleEncryptedZarrHAMTStore
 from .hamt import HAMT, blake3_hashfn
+from .hamt_to_sharded_converter import convert_hamt_to_sharded, sharded_converter_cli
+from .sharded_zarr_store import ShardedZarrStore
 from .store_httpx import ContentAddressedStore, InMemoryCAS, KuboCAS
 from .zarr_hamt_store import ZarrHAMTStore
 
@@ -11,6 +13,7 @@
     "KuboCAS",
     "ZarrHAMTStore",
     "SimpleEncryptedZarrHAMTStore",
+    "ShardedZarrStore",
+    "convert_hamt_to_sharded",
+    "sharded_converter_cli",
 ]
-
-print("Running py-hamt from source!")
 
@@ -8,6 +8,7 @@
     Callable,
     Dict,
     Iterator,
+    Optional,
     cast,
 )
 
@@ -589,10 +590,18 @@ async def delete(self, key: str) -> None:
                 # If we didn't make a change, then this key must not exist within the HAMT
                 raise KeyError
 
-    async def get(self, key: str) -> IPLDKind:
+    async def get(
+        self,
+        key: str,
+        offset: Optional[int] = None,
+        length: Optional[int] = None,
+        suffix: Optional[int] = None,
+    ) -> IPLDKind:
         """Get a value."""
         pointer: IPLDKind = await self.get_pointer(key)
-        data: bytes = await self.cas.load(pointer)
+        data: bytes = await self.cas.load(
+            pointer, offset=offset, length=length, suffix=suffix
+        )
         if self.values_are_bytes:
             return data
         else:
 
@@ -0,0 +1,130 @@
+import argparse
+import asyncio
+import time
+
+import xarray as xr
+from multiformats import CID
+
+from .hamt import HAMT
+from .sharded_zarr_store import ShardedZarrStore
+from .store_httpx import KuboCAS
+from .zarr_hamt_store import ZarrHAMTStore
+
+
+async def convert_hamt_to_sharded(
+    cas: KuboCAS, hamt_root_cid: str, chunks_per_shard: int
+) -> str:
+    """
+    Converts a Zarr dataset from a HAMT-based store to a ShardedZarrStore.
+
+    Args:
+        cas: An initialized ContentAddressedStore instance (KuboCAS).
+        hamt_root_cid: The root CID of the source ZarrHAMTStore.
+        chunks_per_shard: The number of chunks to group into a single shard in the new store.
+
+    Returns:
+        The root CID of the newly created ShardedZarrStore.
+    """
+    print(f"--- Starting Conversion from HAMT Root {hamt_root_cid} ---")
+    start_time = time.perf_counter()
+    # 1. Open the source HAMT store for reading
+    print("Opening source HAMT store...")
+    hamt_ro = await HAMT.build(
+        cas=cas, root_node_id=hamt_root_cid, values_are_bytes=True, read_only=True
+    )
+    source_store = ZarrHAMTStore(hamt_ro, read_only=True)
+    source_dataset = xr.open_zarr(store=source_store, consolidated=True)
+    # 2. Introspect the source array to get its configuration
+    print("Reading metadata from source store...")
+
+    # Read the stores metadata to get array shape and chunk shape
+    data_var_name = next(iter(source_dataset.data_vars))
+    ordered_dims = list(source_dataset[data_var_name].dims)
+    array_shape_tuple = tuple(source_dataset.sizes[dim] for dim in ordered_dims)
+    chunk_shape_tuple = tuple(source_dataset.chunks[dim][0] for dim in ordered_dims)
+    array_shape = array_shape_tuple
+    chunk_shape = chunk_shape_tuple
+
+    # 3. Create the destination ShardedZarrStore for writing
+    print(
+        f"Initializing new ShardedZarrStore with {chunks_per_shard} chunks per shard..."
+    )
+    dest_store = await ShardedZarrStore.open(
+        cas=cas,
+        read_only=False,
+        array_shape=array_shape,
+        chunk_shape=chunk_shape,
+        chunks_per_shard=chunks_per_shard,
+    )
+
+    print("Destination store initialized.")
+
+    # 4. Iterate and copy all data from source to destination
+    print("Starting data migration...")
+    count = 0
+    async for key in hamt_ro.keys():
+        count += 1
+        # Read the raw data (metadata or chunk) from the source
+        cid: CID = await hamt_ro.get_pointer(key)
+        cid_base32_str = str(cid.encode("base32"))
+
+        # Write the exact same key-value pair to the destination.
+        await dest_store.set_pointer(key, cid_base32_str)
+        if count % 200 == 0:  # pragma: no cover
+            print(f"Migrated {count} keys...")  # pragma: no cover
+
+    print(f"Migration of {count} total keys complete.")
+
+    # 5. Finalize the new store by flushing it to the CAS
+    print("Flushing new store to get final root CID...")
+    new_root_cid = await dest_store.flush()
+    end_time = time.perf_counter()
+
+    print("\n--- Conversion Complete! ---")
+    print(f"Total time: {end_time - start_time:.2f} seconds")
+    print(f"New ShardedZarrStore Root CID: {new_root_cid}")
+    return new_root_cid
+
+
+async def sharded_converter_cli():
+    parser = argparse.ArgumentParser(
+        description="Convert a Zarr HAMT store to a Sharded Zarr store."
+    )
+    parser.add_argument(
+        "hamt_cid", type=str, help="The root CID of the source Zarr HAMT store."
+    )
+    parser.add_argument(
+        "--chunks-per-shard",
+        type=int,
+        default=6250,
+        help="Number of chunk CIDs to store per shard in the new store.",
+    )
+    parser.add_argument(
+        "--rpc-url",
+        type=str,
+        default="http://127.0.0.1:5001",
+        help="The URL of the IPFS Kubo RPC API.",
+    )
+    parser.add_argument(
+        "--gateway-url",
+        type=str,
+        default="http://127.0.0.1:8080",
+        help="The URL of the IPFS Gateway.",
+    )
+    args = parser.parse_args()
+    # Initialize the KuboCAS client with the provided RPC and Gateway URLs
+    async with KuboCAS(
+        rpc_base_url=args.rpc_url, gateway_base_url=args.gateway_url
+    ) as cas_client:
+        try:
+            await convert_hamt_to_sharded(
+                cas=cas_client,
+                hamt_root_cid=args.hamt_cid,
+                chunks_per_shard=args.chunks_per_shard,
+            )
+        except Exception as e:
+            print(f"\nAn error occurred: {e}")
+
+
+if __name__ == "__main__":
+    asyncio.run(sharded_converter_cli())  # pragma: no cover