Skip to content

Commit 1637510

Browse files
authored
Release fixes (#8)
* chip output file fix * fix normaliser docstring * comment indent * no data val metadata * use band cnt metadata * nodata metadata usage * re-write with compression * fix order for test * warnings on init * format file * format warning * tqdm dependency * tqdm progress * set dirs as Path * fix inaccurate * needs python >=3.9 * new action versions * remove stale * doc typo * type hint fix * warnings used * extra and fixed tests * fix version * fix python supported
1 parent a1470b5 commit 1637510

File tree

8 files changed

+139
-54
lines changed

8 files changed

+139
-54
lines changed

.github/workflows/main.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ jobs:
1414

1515
steps:
1616
- name: Check out repository
17-
uses: actions/checkout@v3
17+
uses: actions/checkout@v4
1818

1919
- name: Set up Python
20-
uses: actions/setup-python@v4
20+
uses: actions/setup-python@v5
2121
with:
2222
python-version: 3.12
2323

@@ -43,6 +43,6 @@ jobs:
4343
pytest --cov --cov-report=xml
4444

4545
- name: Upload coverage to Codecov
46-
uses: codecov/codecov-action@v3
46+
uses: codecov/codecov-action@v4
4747
with:
4848
token: ${{ secrets.CODECOV_TOKEN }}

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ Using the parameter `use_multiprocessing=True` (default) makes chipping process
6161
The `SegmentationMask` class is used to create a segmentation mask images from geopackage or shapefile using an input image as extent and pixel size reference.
6262

6363
Once the segmentation mask has been created, the segmentation image can also be split into tiles. Some deep learning
64-
frameworks expect images and corresponding masks to have the same file name in separate directories. The `output_name`
65-
argument of ImageChip can ensure this is the case.
64+
frameworks expect images and corresponding masks to have the same file name in separate directories. The `output_name` argument of ImageChip can ensure this is the case.
6665

6766
```python
6867
from rschip import SegmentationMask, ImageChip
@@ -86,7 +85,6 @@ image_chipper = ImageChip(
8685
pixel_dimensions=128,
8786
offset=64,
8887
output_format="tif",
89-
standard_scale=False,
9088
)
9189
image_chipper.chip_image()
9290
```

pyproject.toml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "rschip"
7-
version = "0.3.1"
7+
version = "0.3.2"
88
description = "Prepare satellite images and training data for use with deep learning models"
99
readme = { file = "README.md", content-type = "text/markdown" }
1010
license = { text = "MIT" }
@@ -18,9 +18,6 @@ classifiers = [
1818
"Intended Audience :: Science/Research",
1919
"License :: OSI Approved :: MIT License",
2020
"Programming Language :: Python :: 3",
21-
"Programming Language :: Python :: 3.6",
22-
"Programming Language :: Python :: 3.7",
23-
"Programming Language :: Python :: 3.8",
2421
"Programming Language :: Python :: 3.9",
2522
"Programming Language :: Python :: 3.10",
2623
"Programming Language :: Python :: 3.11",
@@ -34,8 +31,9 @@ dependencies = [
3431
"numpy",
3532
"geopandas",
3633
"shapely",
34+
"tqdm",
3735
]
38-
requires-python = ">=3.6"
36+
requires-python = ">=3.9"
3937

4038
[project.optional-dependencies]
4139
test = [

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
numpy
22
rasterio
33
geopandas
4+
tqdm
45
pytest
56
pytest-cov
67
codecov

src/rschip/image_chip.py

Lines changed: 64 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from pathlib import Path
2+
import warnings
23
import rasterio as rio
34
from rasterio.windows import Window
45
import numpy as np
56
import pickle
67
import multiprocessing
78
import time
9+
from tqdm import tqdm
810

911

1012
class ImageChip:
@@ -36,6 +38,7 @@ def __init__(
3638
output_format="tif",
3739
max_batch_size=1000,
3840
):
41+
3942
self.input_image_path = Path(input_image_path)
4043
self.output_path = Path(output_path) if output_path else Path(input_image_path)
4144
self.output_name = output_name if output_name else Path(input_image_path).stem
@@ -46,6 +49,41 @@ def __init__(
4649
self.use_multiprocessing = use_multiprocessing
4750
self.output_format = output_format
4851
self.max_batch_size = max_batch_size
52+
if not self.input_image_path.exists():
53+
raise FileNotFoundError(f"Input image not found: {self.input_image_path}")
54+
self._read_image_metadata()
55+
if self.pixel_dimensions <= 0:
56+
raise ValueError("pixel_dimensions must be a positive integer")
57+
if self.offset <= 0:
58+
raise ValueError("offset must be a positive integer")
59+
if self.output_format not in ("tif", "npz"):
60+
raise ValueError(
61+
f"output_format must be 'tif' or 'npz', got '{self.output_format}'"
62+
)
63+
64+
def _read_image_metadata(self) -> None:
65+
"""
66+
Read image profile metadata on initialisation.
67+
68+
Sets self.nodata_val from the image nodata property. Set to 0 with a warning
69+
if no value set.
70+
Sets self._band_count for use in band validation.
71+
"""
72+
with rio.open(self.input_image_path) as src:
73+
self._band_count = src.count
74+
nodata = src.nodata
75+
76+
if nodata is not None:
77+
self.nodata_val = nodata
78+
else:
79+
self.nodata_val = 0
80+
warnings.warn(
81+
f"No nodata value found in {self.input_image_path.name}. "
82+
"Defaulting to 0. If 0 is a valid pixel value in your image, "
83+
"scaling and normalisation will incorrectly treat those pixels as nodata.",
84+
UserWarning,
85+
stacklevel=2,
86+
)
4987

5088
def _generate_windows(self, src):
5189
"""
@@ -91,7 +129,7 @@ def _save_chip(self, chip, transform, output_file_path, d_type, src) -> None:
91129
dtype=d_type,
92130
crs=src.crs,
93131
transform=transform,
94-
nodata=None,
132+
nodata=self.nodata_val,
95133
) as dst:
96134
dst.write(chip)
97135

@@ -122,11 +160,9 @@ def _output_file(self, x: int, y: int) -> Path:
122160
Returns:
123161
Path: The full path (as a `Path` object) where the chip will be saved, including the generated file name.
124162
"""
125-
if self.output_name is None:
126-
output_file_name = f"{self.input_image_path.stem}_{x}_{y}.tif"
127-
else:
128-
output_name = self.output_name.replace(".tif", "")
129-
output_file_name = f"{output_name}_{x}_{y}.tif"
163+
164+
output_name = self.output_name.replace(".tif", "")
165+
output_file_name = f"{output_name}_{x}_{y}.tif"
130166
return self.output_path / output_file_name
131167

132168
def set_scaler(self, sample_size=10000, write_file=True, write_path=None):
@@ -147,7 +183,7 @@ def set_scaler(self, sample_size=10000, write_file=True, write_path=None):
147183
<image_file>_scaler_<sample_size>.pkl.
148184
"""
149185
if self.normaliser is not None:
150-
print("normaliser will be set to None")
186+
warnings.warn("normaliser will be set to None")
151187
self.normaliser = None
152188
self.standard_scaler = self.sample_to_scaler(sample_size=sample_size)
153189
if write_file:
@@ -176,8 +212,7 @@ def _validate_normaliser_inputs(self, value, name):
176212
Raises:
177213
ValueError: If value is not valid.
178214
"""
179-
with rio.open(self.input_image_path) as f:
180-
bands = f.profile["count"]
215+
bands = self._band_count
181216
if isinstance(value, list):
182217
if len(value) != bands:
183218
raise ValueError(
@@ -212,7 +247,7 @@ def set_normaliser(
212247
write_file (bool): If True a pickle file is written containing the normaliser dictionary.
213248
write_path (string): The directory and filename (.pkl) where the scaler will be written if `write_file`.
214249
None by default when the file path is written to the same dir as ImageChip.input_image_path and file name
215-
<image_file>_scaler_<sample_size>.pkl.
250+
<image_file>_normaliser.pkl.
216251

217252
"""
218253
if min_val is None or max_val is None:
@@ -227,7 +262,7 @@ def set_normaliser(
227262
max_val = self._validate_normaliser_inputs(max_val, "max_val")
228263

229264
if self.standard_scaler is not None:
230-
print("standard_scaler will be set to None")
265+
warnings.warn("standard_scaler will be set to None")
231266
self.standard_scaler = None
232267

233268
self.normaliser = {"min_val": min_val, "max_val": max_val}
@@ -243,13 +278,12 @@ def set_normaliser(
243278
pickle_file_path = output_dir / pickle_file_name
244279
else:
245280
pickle_file_path = write_path
246-
# Save the dictionary to a pickle file
281+
# Save the dictionary to a pickle file
247282
with open(pickle_file_path, "wb") as f:
248283
pickle.dump(self.normaliser, f)
249284
print(f"Written normaliser to {pickle_file_path}")
250285

251-
@staticmethod
252-
def apply_normaliser(array: np.ndarray, normaliser_dict: dict) -> np.ndarray:
286+
def apply_normaliser(self, array: np.ndarray, normaliser_dict: dict) -> np.ndarray:
253287
"""Normalises a numpy array based on min and max values created by `set_normaliser`.
254288

255289
Args:
@@ -273,8 +307,7 @@ def apply_normaliser(array: np.ndarray, normaliser_dict: dict) -> np.ndarray:
273307
for i in range(array.shape[0]):
274308
min_val = min_vals[i]
275309
max_val = max_vals[i]
276-
# Apply normalising only to non-zero values (assuming 0 is nodata)
277-
mask = array[i, :, :] != 0
310+
mask = array[i, :, :] != self.nodata_val
278311
clipped = np.clip(array[i, :, :], min_val, max_val)
279312
normalised_array[i, :, :] = np.where(
280313
mask, (clipped - min_val) / (max_val - min_val), 0
@@ -318,6 +351,7 @@ def sample_to_scaler(self, sample_size: int) -> dict:
318351
band_pixel_values = pixel_values[:, band_index]
319352
valid_band_pixel_values = band_pixel_values[
320353
~np.isnan(band_pixel_values)
354+
& (band_pixel_values != self.nodata_val)
321355
]
322356
band_vals = {
323357
"band_name": band_names[band_index],
@@ -328,9 +362,8 @@ def sample_to_scaler(self, sample_size: int) -> dict:
328362

329363
return stats_dict
330364

331-
@staticmethod
332365
def apply_scaler(
333-
array: np.ndarray, scaler_dict: dict[int, dict[str, float]]
366+
self, array: np.ndarray, scaler_dict: dict[int, dict[str, float]]
334367
) -> np.ndarray:
335368
"""Standard scales a numpy array based on mean and std values from a dictionary.
336369

@@ -353,8 +386,7 @@ def apply_scaler(
353386
band_info = scaler_dict.get(i)
354387
mean = band_info["mean"]
355388
std = band_info["std"]
356-
# Apply scaling only to non-zero values (assuming 0 is nodata)
357-
mask = array[i, :, :] != 0
389+
mask = array[i, :, :] != self.nodata_val
358390
scaled_array[i, :, :] = np.where(mask, (array[i, :, :] - mean) / std, 0)
359391
return scaled_array
360392

@@ -400,7 +432,9 @@ def _process_batch(self, batch_vals):
400432
out = {}
401433
with rio.open(self.input_image_path) as src:
402434
for x, y, window in batch:
403-
chip = src.read(window=window, boundless=True, fill_value=0)
435+
chip = src.read(
436+
window=window, boundless=True, fill_value=self.nodata_val
437+
)
404438
if self.standard_scaler:
405439
chip = self.apply_scaler(chip, self.standard_scaler)
406440
if self.normaliser:
@@ -471,16 +505,19 @@ def chip_image(self) -> None:
471505
batches = self._calculate_batches(windows)
472506

473507
if self.use_multiprocessing:
474-
print(f"Processing {len(batches)} batches in parallel.")
475508
num_cores = multiprocessing.cpu_count() - 1 # leave a core free?
476-
print(f"Using {num_cores} cores.")
509+
print(
510+
f"Processing {len(batches)} batches in parallel using {num_cores} cores."
511+
)
477512
with multiprocessing.Pool(processes=num_cores) as pool:
478-
pool.map(self._process_batch, batches)
513+
with tqdm(
514+
total=len(batches), desc="Chipping (parallel)", unit="batch"
515+
) as pbar:
516+
for _ in pool.imap_unordered(self._process_batch, batches):
517+
pbar.update()
479518
else:
480-
print(f"Processing in {len(batches)} batches")
481-
for i, batch in enumerate(batches):
519+
for batch in tqdm(batches, desc="Chipping", unit="batch"):
482520
self._process_batch(batch)
483-
print(f"Processed batch {i + 1} of {len(batches)}.")
484521

485522
elapsed_time = time.time() - start_time
486523
print(f"Chipping completed in {elapsed_time:.2f} seconds.")

src/rschip/remove_background_only.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def remove_background_only_npz(
156156
img_npz_file.unlink()
157157

158158
if out_class_dict:
159-
np.savez(f, **out_class_dict)
160-
np.savez(img_npz_file, **out_img_dict)
159+
np.savez_compressed(f, **out_class_dict)
160+
np.savez_compressed(img_npz_file, **out_img_dict)
161161
else:
162162
print(f"No valid entries left in {f.name}, deleting the NPZ file.")

src/rschip/segmentation_mask.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from pathlib import Path
12
import geopandas as gpd
23
import rasterio as rio
34
import rasterio.features
@@ -10,26 +11,26 @@ class SegmentationMask:
1011
Create segmentation mask from polygon features to raster image extent.
1112

1213
Attributes:
13-
input_image_path (str): Path to the input tif image.
14-
input_features_path (str): Path to the input features (shapefile or GeoPackage).
15-
output_path (str): Path where to create the output mask image.
14+
input_image_path (Path): Path to the input tif image.
15+
input_features_path (Path): Path to the input features (shapefile or GeoPackage).
16+
output_path (Path): Path where to create the output mask image.
1617
class_field (str): Attribute field name in input features that determines the pixel value.
1718
Defaults to 'ml_class'.
1819
"""
1920

2021
def __init__(
2122
self,
22-
input_image_path: str,
23-
input_features_path: str,
24-
output_path: str,
23+
input_image_path: Path,
24+
input_features_path: Path,
25+
output_path: Path,
2526
class_field: str = "ml_class",
2627
) -> None:
2728
"""
2829
Initializes SegmentationMask with input image, features, output path, and class field.
2930
"""
30-
self.input_image_path = input_image_path
31-
self.input_features_path = input_features_path
32-
self.output_path = output_path
31+
self.input_image_path = Path(input_image_path)
32+
self.input_features_path = Path(input_features_path)
33+
self.output_path = Path(output_path)
3334
self.class_field = class_field
3435

3536
def create_mask(self) -> None:

0 commit comments

Comments
 (0)