Skip to content

Commit 4f26883

Browse files
authored
feat: Add script to generate npy files from saved tiffs (#30)
1 parent 1e86de1 commit 4f26883

File tree

3 files changed

+100
-2
lines changed

3 files changed

+100
-2
lines changed

.gitignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
*.npy
2+
13
# Byte-compiled / optimized / DLL files
24
__pycache__/
35
*.py[codz]
@@ -182,9 +184,9 @@ cython_debug/
182184
.abstra/
183185

184186
# Visual Studio Code
185-
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
187+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186188
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187-
# and can be added to the global gitignore or merged into this file. However, if you prefer,
189+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
188190
# you could uncomment the following to ignore the entire vscode folder
189191
# .vscode/
190192

pixi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ version = "0.1.0"
99
generate = "python -m rasterio_generated.cli"
1010
info = "bash scripts/info.sh"
1111
check = "bash scripts/check.sh"
12+
generate-npy = "python scripts/generate_npy.py"
1213

1314
[dependencies]
1415
gdal = ">=3.12.1,<4"

scripts/generate_npy.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Generate .npy tile files alongside each TIFF in the repository.
2+
3+
This is intended to be used for downstream libraries to have "known" valid data to test
4+
against, without needing to read TIFF files directly. I.e. in Python we can easily test
5+
against rasterio at runtime, but in something like JavaScript, there's no canonical
6+
known-good TIFF reader. So having .npy files (which are pretty easy to parse in any
7+
language) allows us to test against a known reference implementation.
8+
9+
For each TIFF file, this script reads every internal tile (block) at each IFD
10+
level (full resolution + overviews) and saves them as NumPy .npy files.
11+
12+
Output structure:
13+
<tif_stem>/<z>-<x>-<y>.npy
14+
15+
where z=0 is full resolution, z=1 is the first overview, etc.
16+
x is the column index and y is the row index of the tile.
17+
18+
The .npy array has shape (bands, height, width).
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import math
24+
from pathlib import Path
25+
26+
import numpy as np
27+
import rasterio
28+
import rasterio.windows
29+
30+
31+
def generate_npy_tiles(tif_path: Path) -> None:
32+
"""Generate .npy files for every internal tile of a TIFF."""
33+
output_dir = tif_path.parent / tif_path.stem
34+
output_dir.mkdir(exist_ok=True)
35+
36+
with rasterio.open(tif_path) as src:
37+
_write_tiles_for_ifd(src, output_dir, z=0)
38+
39+
for ix, _ in enumerate(src.overviews(1)):
40+
with rasterio.open(tif_path, OVERVIEW_LEVEL=ix) as ovr:
41+
_write_tiles_for_ifd(ovr, output_dir, z=ix + 1)
42+
43+
44+
def _write_tiles_for_ifd(
45+
dataset: rasterio.DatasetReader,
46+
output_dir: Path,
47+
z: int,
48+
) -> None:
49+
"""Write .npy tiles for a single IFD (full res or overview)."""
50+
block_shapes = dataset.block_shapes
51+
# All bands should have the same block shape in a COG
52+
block_height, block_width = block_shapes[0]
53+
54+
n_tiles_x = math.ceil(dataset.width / block_width)
55+
n_tiles_y = math.ceil(dataset.height / block_height)
56+
57+
for tile_y in range(n_tiles_y):
58+
for tile_x in range(n_tiles_x):
59+
window = rasterio.windows.Window(
60+
col_off=tile_x * block_width,
61+
row_off=tile_y * block_height,
62+
width=block_width,
63+
height=block_height,
64+
)
65+
66+
data = dataset.read(window=window, boundless=True)
67+
68+
npy_path = output_dir / f"{z}-{tile_x}-{tile_y}.npy"
69+
np.save(npy_path, data)
70+
71+
72+
def main() -> None:
73+
repo_root = Path(__file__).resolve().parent.parent
74+
75+
tif_paths = sorted(repo_root.rglob("*.tif"))
76+
77+
if not tif_paths:
78+
print("No .tif files found!")
79+
return
80+
81+
print(f"Found {len(tif_paths)} TIFF file(s)\n")
82+
83+
for tif_path in tif_paths:
84+
rel = tif_path.relative_to(repo_root)
85+
print(f"Processing {rel}...")
86+
generate_npy_tiles(tif_path)
87+
output_dir = tif_path.parent / tif_path.stem
88+
npy_count = len(list(output_dir.glob("*.npy")))
89+
print(f" → {npy_count} tile(s) in {output_dir.relative_to(repo_root)}/")
90+
91+
print("\nDone!")
92+
93+
94+
if __name__ == "__main__":
95+
main()

0 commit comments

Comments
 (0)