diff --git a/src/virtual_tiff/parser.py b/src/virtual_tiff/parser.py index b3988c0..7e62ef7 100644 --- a/src/virtual_tiff/parser.py +++ b/src/virtual_tiff/parser.py @@ -173,7 +173,7 @@ def _add_dim_for_samples_per_pixel( def _construct_chunk_manifest( *, - path: str, + url: str, shape: tuple[int, ...], chunks: tuple[int, ...], offsets: Iterable[int], @@ -189,9 +189,9 @@ def _construct_chunk_manifest( ) offsets = offsets.reshape(chunk_manifest_shape) byte_counts = byte_counts.reshape(chunk_manifest_shape) - paths = np.full_like(offsets, path, dtype=np.dtypes.StringDType) + urls = np.full_like(offsets, url, dtype=np.dtypes.StringDType) return ChunkManifest.from_arrays( - paths=paths, + paths=urls, offsets=offsets, lengths=byte_counts, ) @@ -202,7 +202,7 @@ async def _open_tiff(*, path: str, store: ObjectStore) -> TIFF: def _construct_manifest_array( - *, ifd: ImageFileDirectory, path: str, endian: str + *, ifd: ImageFileDirectory, url: str, endian: str ) -> ManifestArray: if ifd.other_tags.get(330): raise NotImplementedError("TIFFs with Sub-IFDs are not yet supported.") @@ -231,7 +231,7 @@ def _construct_manifest_array( ) dimension_names = ("band",) + dimension_names chunk_manifest = _construct_chunk_manifest( - path=path, shape=shape, chunks=chunks, offsets=offsets, byte_counts=byte_counts + url=url, shape=shape, chunks=chunks, offsets=offsets, byte_counts=byte_counts ) codecs = _get_codecs(ifd, shape=shape, chunks=chunks, dtype=dtype, endian=endian) attributes = _get_attributes(ifd) @@ -258,6 +258,7 @@ def _construct_manifest_array( def _construct_manifest_group( + url: str, store: ObjectStore, path: str, *, @@ -278,11 +279,10 @@ def _construct_manifest_group( ManifestGroup containing the processed TIFF data """ # TODO: Make an async approach - urlpath = urlparse(path).path - tiff = sync(_open_tiff(store=store, path=urlpath)) + tiff = sync(_open_tiff(store=store, path=path)) # Build manifest arrays from selected IFDs - manifest_arrays = _build_manifest_arrays(tiff, path, endian, ifd) + manifest_arrays = _build_manifest_arrays(tiff, url, endian, ifd) # Organize into appropriate group structure attrs: dict[str, Any] = {} @@ -298,7 +298,7 @@ def _construct_manifest_group( def _build_manifest_arrays( tiff: TIFF, - path: str, + url: str, endian: str, ifd_index: int | None, ) -> dict[str, ManifestArray]: @@ -318,13 +318,13 @@ def _build_manifest_arrays( if ifd_index is not None: # Process single specified IFD manifest_arrays[str(ifd_index)] = _construct_manifest_array( - ifd=tiff.ifds[ifd_index], path=path, endian=endian + ifd=tiff.ifds[ifd_index], url=url, endian=endian ) else: # Process all IFDs for idx, ifd in enumerate(tiff.ifds): manifest_arrays[str(idx)] = _construct_manifest_array( - ifd=ifd, path=path, endian=endian + ifd=ifd, url=url, endian=endian ) return manifest_arrays @@ -386,15 +386,14 @@ def __call__(self, url: str, registry: ObjectStoreRegistry) -> ManifestStore: Returns: ms : ManifestStore containing ChunkManifests and Array metadata for the specified IFDs, along with an ObjectStore instance for loading any data. """ - parsed = urlparse(url) - urlpath = parsed.path store, path_in_store = registry.resolve(url) - endian = ENDIAN[store.get_range(urlpath, start=0, end=2).to_bytes()] + endian = ENDIAN[store.get_range(path_in_store, start=0, end=2).to_bytes()] async_tiff_store = convert_obstore_to_async_tiff_store(store) # Create a group containing dataset level metadata and all the manifest arrays manifest_group = _construct_manifest_group( + url, store=async_tiff_store, - path=url, + path=path_in_store, ifd=self._ifd, endian=endian, ifd_layout=self.ifd_layout, diff --git a/tests/test_virtual_tiff.py b/tests/test_virtual_tiff.py index 50c844b..7987a76 100644 --- a/tests/test_virtual_tiff.py +++ b/tests/test_virtual_tiff.py @@ -56,3 +56,13 @@ def test_virtual_dataset_from_tiff(filename): ds = ms.to_virtual_dataset() assert isinstance(ds, xr.Dataset) # TODO: Add more property tests + + +def test_local_store_with_prefix(): + data_dir = resolve_folder('tests/dvc/github').absolute() + filepath = data_dir / "test_reference.tif" + parser = VirtualTIFF(ifd=0) + registry = ObjectStoreRegistry({"file://": LocalStore(data_dir)}) + ms = parser(f"file://{filepath}", registry=registry) + ds = ms.to_virtual_dataset() + assert isinstance(ds, xr.Dataset)