Skip to content

Commit 52da096

Browse files
authored
Merge pull request #122 from quantifyearth/mwd-sparse-geotiff
Add support for sparse geotiffs
2 parents f7c636d + 7a66e0f commit 52da096

File tree

5 files changed

+115
-4
lines changed

5 files changed

+115
-4
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### Added
44

55
* Added a `to_graphviz` call for expressions to help with debugging.
6+
* Support sparse GeoTIFF creation.
67

78
### Fixed
89

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ dev = [
5555
"mike",
5656
"mkdocs-gen-files",
5757
"black",
58+
"tifffile",
5859
]
5960

6061
[project.urls]

tests/unit/test_sparse_files.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import tempfile
2+
from pathlib import Path
3+
4+
import numpy as np
5+
import pytest
6+
import tifffile
7+
import yirgacheffe as yg
8+
9+
def test_sprase_needs_nodata() -> None:
10+
data = np.zeros((5000, 5000))
11+
with tempfile.TemporaryDirectory() as tempdir:
12+
filename = Path(tempdir) / "test.tif"
13+
with yg.from_array(data, (0, 0), yg.MapProjection("EPSG:4326", 10.0, -10.0)) as layer:
14+
with pytest.raises(ValueError):
15+
layer.to_geotiff(filename, sparse=True)
16+
17+
18+
@pytest.mark.parametrize("dtype,zero", [
19+
(np.int8, 0),
20+
(np.float32, 0.0),
21+
])
22+
def test_all_sparse(dtype: type, zero: int | float) -> None:
23+
data = np.zeros((5000, 5000)).astype(dtype) # type: ignore
24+
with tempfile.TemporaryDirectory() as tempdir:
25+
filename = Path(tempdir) / "test.tif"
26+
with yg.from_array(data, (0, 0), yg.MapProjection("EPSG:4326", 10.0, -10.0)) as layer:
27+
layer.to_geotiff(filename, nodata=zero, sparse=True)
28+
with yg.read_raster(filename) as layer:
29+
# no data is nan, so convert that to 0, as otherwise
30+
# this test will miss things
31+
assert layer.nan_to_num().sum() == 0
32+
33+
# now look into the tiff structure to see that all blocks are tiff
34+
with tifffile.TiffFile(filename) as tif:
35+
page = tif.pages[0] # type: ignore
36+
offsets = page.tags.get('StripOffsets').value # type: ignore
37+
byte_counts = page.tags.get('StripByteCounts').value # type: ignore
38+
39+
for offset, count in zip(offsets, byte_counts):
40+
assert offset == 0
41+
assert count == 0
42+
43+
44+
@pytest.mark.parametrize("dtype,zero", [
45+
(np.int8, 0),
46+
(np.float32, 0.0),
47+
])
48+
def test_none_sparse(dtype: type, zero: int | float) -> None:
49+
data = np.ones((5000, 5000)).astype(dtype) # type: ignore
50+
with tempfile.TemporaryDirectory() as tempdir:
51+
filename = Path(tempdir) / "test.tif"
52+
with yg.from_array(data, (0, 0), yg.MapProjection("EPSG:4326", 10.0, -10.0)) as layer:
53+
layer.to_geotiff(filename, nodata=zero, sparse=True)
54+
with yg.read_raster(filename) as layer:
55+
assert layer.sum() == 5000 * 5000
56+
57+
# now look into the tiff structure to see that all blocks are tiff
58+
with tifffile.TiffFile(filename) as tif:
59+
page = tif.pages[0] # type: ignore
60+
offsets = page.tags.get('StripOffsets').value # type: ignore
61+
byte_counts = page.tags.get('StripByteCounts').value # type: ignore
62+
63+
for offset, count in zip(offsets, byte_counts):
64+
assert offset != 0
65+
assert count != 0
66+
67+
68+
@pytest.mark.parametrize("dtype,zero", [
69+
(np.int8, 0),
70+
(np.float32, 0.0),
71+
])
72+
def test_mixed_sparse(dtype: type, zero: int | float) -> None:
73+
data = np.array([[i % 2] * 5000 for i in range(5000)]).astype(dtype) # type: ignore
74+
with tempfile.TemporaryDirectory() as tempdir:
75+
filename = Path(tempdir) / "test.tif"
76+
with yg.from_array(data, (0, 0), yg.MapProjection("EPSG:4326", 10.0, -10.0)) as layer:
77+
layer.to_geotiff(filename, nodata=zero, sparse=True)
78+
with yg.read_raster(filename) as layer:
79+
# no data is nan, so convert that to 0, as otherwise
80+
# this test will miss things
81+
assert layer.nan_to_num().sum() == 5000 * 5000 / 2
82+
83+
# now look into the tiff structure to see that all blocks are tiff
84+
with tifffile.TiffFile(filename) as tif:
85+
page = tif.pages[0] # type: ignore
86+
offsets = page.tags.get('StripOffsets').value # type: ignore
87+
byte_counts = page.tags.get('StripByteCounts').value # type: ignore
88+
89+
for i, (offset, count) in enumerate(zip(offsets, byte_counts)):
90+
if i % 2:
91+
assert offset != 0
92+
assert count != 0
93+
else:
94+
assert offset == 0
95+
assert count == 0

yirgacheffe/_operators/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ def to_geotiff(
312312
parallelism: int | bool | None = None,
313313
callback: Callable[[float], None] | None = None,
314314
nodata: float | int | None = None,
315+
sparse: bool = False,
315316
) -> float | None:
316317
"""Saves a calculation to a raster file, optionally also returning the sum of pixels.
317318
@@ -323,7 +324,8 @@ def to_geotiff(
323324
yirgacheffe will pick a sensible value.
324325
callback: If passed, this callback will be called periodically with a progress update for the saving,
325326
with a value between 0.0 and 1.0.
326-
nodata: Nominate a value to be stored as nodata in the result
327+
nodata: Nominate a value to be stored as nodata in the result.
328+
sparse: If True then save a sparse GeoTIFF as per GDAL's extension to the GeoTIFF standard.
327329
328330
Returns:
329331
Either returns None, or the sum of the pixels in the resulting raster if `and_sum` was specified.
@@ -334,6 +336,7 @@ def to_geotiff(
334336
parallelism=parallelism,
335337
callback=callback,
336338
nodata=nodata,
339+
sparse=sparse,
337340
)
338341

339342
def sum(self):
@@ -1328,8 +1331,12 @@ def to_geotiff(
13281331
parallelism: int | bool | None = None,
13291332
callback: Callable[[float], None] | None = None,
13301333
nodata: float | int | None = None,
1334+
sparse: bool = False,
13311335
) -> float | None:
13321336

1337+
if sparse and nodata is None:
1338+
raise ValueError("Nodata value must be provided for sparse GeoTIFFs")
1339+
13331340
# We want to write to a tempfile before we move the result into place, but we can't use
13341341
# the actual $TMPDIR as that might be on a different device, and so we use a file next to where
13351342
# the final file will be, so we just need to rename the file at the end, not move it. But for special cases,
@@ -1351,7 +1358,8 @@ def to_geotiff(
13511358
with RasterLayer.empty_raster_layer_like(
13521359
self,
13531360
filename=inner_filename, # type: ignore
1354-
nodata=nodata
1361+
nodata=nodata,
1362+
sparse=sparse,
13551363
) as layer:
13561364
if parallelism is None:
13571365
result = self.save(layer, and_sum=and_sum, callback=callback)

yirgacheffe/layers/rasters.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ def empty_raster_layer(
3333
nodata: float | int | None = None,
3434
nbits: int | None = None,
3535
threads: int | None = None,
36-
bands: int=1
36+
bands: int=1,
37+
sparse: bool=False,
3738
) -> RasterLayer:
3839
abs_xstep, abs_ystep = abs(scale.xstep), abs(scale.ystep)
3940

@@ -48,6 +49,8 @@ def empty_raster_layer(
4849
options.append(f"NUM_THREADS={threads}")
4950
if nbits is not None:
5051
options.append(f"NBITS={nbits}")
52+
if sparse:
53+
options.append("SPARSE_OK=YES")
5154

5255
map_projection = MapProjection(projection, scale.xstep, scale.ystep)
5356
# If the area is projected, use that, otherwise we need to project it
@@ -100,7 +103,8 @@ def empty_raster_layer_like(
100103
nodata: float | int | None = None,
101104
nbits: int | None = None,
102105
threads: int | None = None,
103-
bands: int=1
106+
bands: int=1,
107+
sparse: bool=False,
104108
) -> RasterLayer:
105109
if area is None:
106110
area = layer.area
@@ -135,6 +139,8 @@ def empty_raster_layer_like(
135139
options.append(f"NUM_THREADS={threads}")
136140
if nbits is not None:
137141
options.append(f"NBITS={nbits}")
142+
if sparse:
143+
options.append("SPARSE_OK=YES")
138144

139145
if filename:
140146
driver = gdal.GetDriverByName('GTiff')

0 commit comments

Comments
 (0)