Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 14 additions & 15 deletions src/virtual_tiff/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def _add_dim_for_samples_per_pixel(

def _construct_chunk_manifest(
*,
path: str,
url: str,
shape: tuple[int, ...],
chunks: tuple[int, ...],
offsets: Iterable[int],
Expand All @@ -189,9 +189,9 @@ def _construct_chunk_manifest(
)
offsets = offsets.reshape(chunk_manifest_shape)
byte_counts = byte_counts.reshape(chunk_manifest_shape)
paths = np.full_like(offsets, path, dtype=np.dtypes.StringDType)
urls = np.full_like(offsets, url, dtype=np.dtypes.StringDType)
return ChunkManifest.from_arrays(
paths=paths,
paths=urls,
offsets=offsets,
lengths=byte_counts,
)
Expand All @@ -202,7 +202,7 @@ async def _open_tiff(*, path: str, store: ObjectStore) -> TIFF:


def _construct_manifest_array(
*, ifd: ImageFileDirectory, path: str, endian: str
*, ifd: ImageFileDirectory, url: str, endian: str
) -> ManifestArray:
if ifd.other_tags.get(330):
raise NotImplementedError("TIFFs with Sub-IFDs are not yet supported.")
Expand Down Expand Up @@ -231,7 +231,7 @@ def _construct_manifest_array(
)
dimension_names = ("band",) + dimension_names
chunk_manifest = _construct_chunk_manifest(
path=path, shape=shape, chunks=chunks, offsets=offsets, byte_counts=byte_counts
url=url, shape=shape, chunks=chunks, offsets=offsets, byte_counts=byte_counts
)
codecs = _get_codecs(ifd, shape=shape, chunks=chunks, dtype=dtype, endian=endian)
attributes = _get_attributes(ifd)
Expand All @@ -258,6 +258,7 @@ def _construct_manifest_array(


def _construct_manifest_group(
url: str,
store: ObjectStore,
path: str,
*,
Expand All @@ -278,11 +279,10 @@ def _construct_manifest_group(
ManifestGroup containing the processed TIFF data
"""
# TODO: Make an async approach
urlpath = urlparse(path).path
tiff = sync(_open_tiff(store=store, path=urlpath))
tiff = sync(_open_tiff(store=store, path=path))

# Build manifest arrays from selected IFDs
manifest_arrays = _build_manifest_arrays(tiff, path, endian, ifd)
manifest_arrays = _build_manifest_arrays(tiff, url, endian, ifd)

# Organize into appropriate group structure
attrs: dict[str, Any] = {}
Expand All @@ -298,7 +298,7 @@ def _construct_manifest_group(

def _build_manifest_arrays(
tiff: TIFF,
path: str,
url: str,
endian: str,
ifd_index: int | None,
) -> dict[str, ManifestArray]:
Expand All @@ -318,13 +318,13 @@ def _build_manifest_arrays(
if ifd_index is not None:
# Process single specified IFD
manifest_arrays[str(ifd_index)] = _construct_manifest_array(
ifd=tiff.ifds[ifd_index], path=path, endian=endian
ifd=tiff.ifds[ifd_index], url=url, endian=endian
)
else:
# Process all IFDs
for idx, ifd in enumerate(tiff.ifds):
manifest_arrays[str(idx)] = _construct_manifest_array(
ifd=ifd, path=path, endian=endian
ifd=ifd, url=url, endian=endian
)

return manifest_arrays
Expand Down Expand Up @@ -386,15 +386,14 @@ def __call__(self, url: str, registry: ObjectStoreRegistry) -> ManifestStore:
Returns:
ms : ManifestStore containing ChunkManifests and Array metadata for the specified IFDs, along with an ObjectStore instance for loading any data.
"""
parsed = urlparse(url)
urlpath = parsed.path
store, path_in_store = registry.resolve(url)
endian = ENDIAN[store.get_range(urlpath, start=0, end=2).to_bytes()]
endian = ENDIAN[store.get_range(path_in_store, start=0, end=2).to_bytes()]
async_tiff_store = convert_obstore_to_async_tiff_store(store)
# Create a group containing dataset level metadata and all the manifest arrays
manifest_group = _construct_manifest_group(
url,
store=async_tiff_store,
path=url,
path=path_in_store,
ifd=self._ifd,
endian=endian,
ifd_layout=self.ifd_layout,
Expand Down
10 changes: 10 additions & 0 deletions tests/test_virtual_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,13 @@ def test_virtual_dataset_from_tiff(filename):
ds = ms.to_virtual_dataset()
assert isinstance(ds, xr.Dataset)
# TODO: Add more property tests


def test_local_store_with_prefix():
data_dir = resolve_folder('tests/dvc/github').absolute()
filepath = data_dir / "chirps-v2.0.2025.01.tif"
parser = VirtualTIFF(ifd=0)
registry = ObjectStoreRegistry({"file://": LocalStore(data_dir)})
ms = parser(f"file://{filepath}", registry=registry)
ds = ms.to_virtual_dataset()
assert isinstance(ds, xr.Dataset)