Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/async_geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
[cogeo]: https://cogeo.org/
"""

from ._array import Array
from ._geotiff import GeoTIFF
from ._overview import Overview
from ._version import __version__

__all__ = ["GeoTIFF", "Overview", "__version__"]
__all__ = ["Array", "GeoTIFF", "Overview", "__version__"]
32 changes: 32 additions & 0 deletions src/async_geotiff/_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from affine import Affine
from numpy.typing import NDArray
from pyproj import CRS


@dataclass(frozen=True, kw_only=True, eq=False)
class Array:
"""An array representation of data from a GeoTIFF."""

data: NDArray
"""The raw byte data of the array."""

mask: NDArray | None
"""The mask array, if any."""

width: int
"""The width of the array in pixels."""

height: int
"""The height of the array in pixels."""

transform: Affine
"""The affine transform mapping pixel coordinates to geographic coordinates."""

crs: CRS
"""The coordinate reference system of the array."""
111 changes: 111 additions & 0 deletions src/async_geotiff/_fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from __future__ import annotations

import asyncio
from typing import TYPE_CHECKING

import numpy as np
from affine import Affine

from async_geotiff import Array

if TYPE_CHECKING:
from async_tiff import TIFF
from async_tiff import Array as AsyncTiffArray
from pyproj import CRS


async def fetch_tile( # noqa: PLR0913
*,
x: int,
y: int,
tiff: TIFF,
crs: CRS,
ifd_index: int,
mask_ifd_index: int | None,
transform: Affine,
tile_width: int,
tile_height: int,
) -> Array:
tile_fut = tiff.fetch_tile(x, y, ifd_index)

mask_data: AsyncTiffArray | None = None
if mask_ifd_index := mask_ifd_index:
mask_fut = tiff.fetch_tile(x, y, mask_ifd_index)
tile, mask = await asyncio.gather(tile_fut, mask_fut)
tile_data, mask_data = await asyncio.gather(tile.decode(), mask.decode())
else:
tile = await tile_fut
tile_data = await tile.decode()

tile_transform = transform * Affine.translation(
x * tile_width,
y * tile_height,
)

return Array(
data=np.asarray(tile_data),
mask=np.asarray(mask_data) if mask_data else None,
crs=crs,
transform=tile_transform,
width=tile_width,
height=tile_height,
)


async def fetch_tiles( # noqa: PLR0913, D417
*,
xs: list[int],
ys: list[int],
tiff: TIFF,
crs: CRS,
ifd_index: int,
mask_ifd_index: int | None,
transform: Affine,
tile_width: int,
tile_height: int,
) -> list[Array]:
"""Fetch multiple tiles from this overview.

Args:
xs: The x coordinates of the tiles.
ys: The y coordinates of the tiles.

"""
tiles_fut = tiff.fetch_tiles(xs, ys, ifd_index)

decoded_masks: list[AsyncTiffArray | None] = [None] * len(xs)
if mask_ifd_index := mask_ifd_index:
masks_fut = tiff.fetch_tiles(xs, ys, mask_ifd_index)
tiles, masks = await asyncio.gather(tiles_fut, masks_fut)

decoded_tile_futs = [tile.decode() for tile in tiles]
decoded_mask_futs = [mask.decode() for mask in masks]
decoded_tiles = await asyncio.gather(*decoded_tile_futs)
decoded_masks = await asyncio.gather(*decoded_mask_futs)
else:
tiles = await tiles_fut
decoded_tiles = await asyncio.gather(*[tile.decode() for tile in tiles])

arrays: list[Array] = []
for x, y, tile_data, mask_data in zip(
xs,
ys,
decoded_tiles,
decoded_masks,
strict=True,
):
tile_transform = transform * Affine.translation(
x * tile_width,
y * tile_height,
)
array = Array(
data=np.asarray(tile_data),
mask=np.asarray(mask_data) if mask_data else None,
crs=crs,
transform=tile_transform,
width=tile_width,
height=tile_height,
)
arrays.append(array)

return arrays
71 changes: 71 additions & 0 deletions src/async_geotiff/_geotiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from async_tiff.enums import PhotometricInterpretation

from async_geotiff._crs import crs_from_geo_keys
from async_geotiff._fetch import fetch_tile as _fetch_tile
from async_geotiff._fetch import fetch_tiles as _fetch_tiles
from async_geotiff._overview import Overview
from async_geotiff.enums import Compression, Interleaving

Expand All @@ -19,6 +21,8 @@
from async_tiff import GeoKeyDirectory, ImageFileDirectory, ObspecInput
from async_tiff.store import ObjectStore # type: ignore # noqa: PGH003

from async_geotiff import Array


@dataclass(frozen=True, init=False, kw_only=True, repr=False)
class GeoTIFF:
Expand All @@ -34,6 +38,10 @@ class GeoTIFF:
Some tags, like most geo tags, only exist on the primary IFD.
"""

_mask_ifd: ImageFileDirectory | None = None
"""The mask IFD of the GeoTIFF, if any.
"""

_gkd: GeoKeyDirectory = field(init=False)
"""The GeoKeyDirectory of the primary IFD.
"""
Expand Down Expand Up @@ -61,6 +69,12 @@ def __init__(self, tiff: TIFF) -> None:

# Skip the first IFD, since it's the primary image
ifd_idx = 1

# Check if the primary IFD has a mask
if len(tiff.ifds) >= 2 and is_mask_ifd(tiff.ifds[ifd_idx]): # noqa: PLR2004
object.__setattr__(self, "_mask_ifd", tiff.ifds[ifd_idx])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully remember the COG architecture, but I think at one point the Mask where stored after all the image IFD (at least when not using the COG driver)

Does async-geotiff read the ghost header https://gdal.org/en/stable/drivers/raster/cog.html#header-ghost-area ?

also might specify in the library that we don't support external overview/mask 🤷

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. I just assumed it would always be data/mask/overview/mask overview.

No, we don't currently read the ghost header. @geospatial-jeff suggested not to in developmentseed/async-tiff#7. I think we might want to allow injecting support for it. In theory that might be something we could inject.

I think in the future, supporting external overview/mask would be possible, but no, not right now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In 8f02864 (this PR) I implemented a cleaner association of data ifds to masks:

  • First create dicts mapping from (image height, image width): (ifd index, ifd):

     data_ifds: dict[tuple[int, int], tuple[int, ImageFileDirectory]] = {}
     mask_ifds: dict[tuple[int, int], tuple[int, ImageFileDirectory]] = {}
  • Then iterate over the data ifds from largest to smallest. If there's a mask IFD of the same height and width, then associate the mask IFD with that data IFD.

ifd_idx += 1

overviews: list[Overview] = []
while True:
try:
Expand Down Expand Up @@ -217,6 +231,53 @@ def dtypes(self) -> list[str]:
# https://github.com/developmentseed/async-geotiff/issues/20
raise NotImplementedError

async def fetch_tile(
self,
x: int,
y: int,
) -> Array:
"""Fetch a tile from the full-resolution image.

Args:
x: The x coordinate of the tile.
y: The y coordinate of the tile.

"""
return await _fetch_tile(
x=x,
y=y,
tiff=self._tiff,
crs=self.crs,
ifd_index=0,
mask_ifd_index=1 if self._mask_ifd else None,
transform=self.transform,
tile_width=self.tile_width,
tile_height=self.tile_height,
)

# TODO: relax type hints to Sequence[int]
# upstream issue:
# https://github.com/developmentseed/async-tiff/issues/198
async def fetch_tiles(self, xs: list[int], ys: list[int]) -> list[Array]:
"""Fetch multiple tiles from the full-resolution image.

Args:
xs: The x coordinates of the tiles.
ys: The y coordinates of the tiles.

"""
return await _fetch_tiles(
xs=xs,
ys=ys,
tiff=self._tiff,
crs=self.crs,
ifd_index=0,
mask_ifd_index=1 if self._mask_ifd else None,
transform=self.transform,
tile_width=self.tile_width,
tile_height=self.tile_height,
)

@property
def height(self) -> int:
"""The height (number of rows) of the full image."""
Expand Down Expand Up @@ -294,6 +355,16 @@ def shape(self) -> tuple[int, int]:
"""Get the shape (height, width) of the full image."""
return (self.height, self.width)

@property
def tile_height(self) -> int:
"""The height in pixels per tile of the image."""
return self._primary_ifd.tile_height or self.height

@property
def tile_width(self) -> int:
"""The width in pixels per tile of the image."""
return self._primary_ifd.tile_width or self.width

@cached_property
def transform(self) -> Affine:
"""Return the dataset's georeferencing transformation matrix.
Expand Down
64 changes: 63 additions & 1 deletion src/async_geotiff/_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@

from affine import Affine

from async_geotiff._fetch import fetch_tile as _fetch_tile
from async_geotiff._fetch import fetch_tiles as _fetch_tiles

if TYPE_CHECKING:
from async_tiff import GeoKeyDirectory, ImageFileDirectory

from async_geotiff import GeoTIFF
from async_geotiff import Array, GeoTIFF

# ruff: noqa: SLF001


@dataclass(init=False, frozen=True, kw_only=True, eq=False, repr=False)
Expand Down Expand Up @@ -55,11 +60,68 @@ def _create(

return instance

async def fetch_tile(
self,
x: int,
y: int,
) -> Array:
"""Fetch a tile from this overview.

Args:
x: The x coordinate of the tile.
y: The y coordinate of the tile.

"""
return await _fetch_tile(
x=x,
y=y,
tiff=self._geotiff._tiff,
crs=self._geotiff.crs,
ifd_index=self._ifd[0],
mask_ifd_index=self._mask_ifd[0] if self._mask_ifd else None,
transform=self.transform,
tile_width=self.tile_width,
tile_height=self.tile_height,
)

# TODO: relax type hints to Sequence[int]
# upstream issue:
# https://github.com/developmentseed/async-tiff/issues/198
async def fetch_tiles(self, xs: list[int], ys: list[int]) -> list[Array]:
"""Fetch multiple tiles from this overview.

Args:
xs: The x coordinates of the tiles.
ys: The y coordinates of the tiles.

"""
return await _fetch_tiles(
xs=xs,
ys=ys,
tiff=self._geotiff._tiff,
crs=self._geotiff.crs,
ifd_index=self._ifd[0],
mask_ifd_index=self._mask_ifd[0] if self._mask_ifd else None,
transform=self.transform,
tile_width=self.tile_width,
tile_height=self.tile_height,
)

@property
def height(self) -> int:
"""The height of the overview in pixels."""
return self._ifd[1].image_height

@property
def tile_height(self) -> int:
"""The height in pixels per tile of the overview."""
return self._ifd[1].tile_height or self.height

@property
def tile_width(self) -> int:
"""The width in pixels per tile of the overview."""
return self._ifd[1].tile_width or self.width

@cached_property
def transform(self) -> Affine:
"""The affine transform mapping pixel coordinates to geographic coordinates.
Expand Down
28 changes: 28 additions & 0 deletions tests/test_fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Test fetching tiles from a GeoTIFF."""

from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np
import pytest
from rasterio.windows import Window

if TYPE_CHECKING:
from .conftest import LoadGeoTIFF, LoadRasterio


@pytest.mark.asyncio
async def test_fetch(load_geotiff: LoadGeoTIFF, load_rasterio: LoadRasterio) -> None:
name = "uint8_rgb_deflate_block64_cog"

geotiff = await load_geotiff(name)

tile = await geotiff.fetch_tile(0, 0)

window = Window(0, 0, geotiff.tile_width, geotiff.tile_height)
with load_rasterio(name) as rasterio_ds:
rasterio_data = rasterio_ds.read(window=window)

np.testing.assert_array_equal(tile.data, rasterio_data)
assert tile.crs == geotiff.crs
Loading