Skip to content

Commit 7bb4b5c

Browse files
oberonia78Jungkyo Jung
andauthored
Add license to staged WaterMask VRT (isce-framework#77)
* read README and add license to vrt * remove unused function * clean stage_watermask * extract only note from LICENSE instead of reading all contents * add short description * fix one line --------- Co-authored-by: Jungkyo Jung <jungkyoj@nisar-adt-dev-5.jpl.nasa.gov>
1 parent 0b54d81 commit 7bb4b5c

File tree

1 file changed

+138
-36
lines changed

1 file changed

+138
-36
lines changed

python/packages/nisar/workflows/stage_watermask.py

Lines changed: 138 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44

55
import argparse
66
import os
7-
import backoff
7+
import re
8+
import warnings
89

10+
import backoff
911
import numpy as np
1012
import shapely.ops
1113
import shapely.wkt
@@ -25,7 +27,7 @@
2527

2628
def cmdLineParse():
2729
"""
28-
Command line parser
30+
Command line parser
2931
"""
3032
parser = argparse.ArgumentParser(description="""
3133
Stage and verify WATERMASK for processing. """,
@@ -42,7 +44,7 @@ def cmdLineParse():
4244
parser.add_argument('-m', '--margin', type=int, action='store',
4345
default=5, help='Margin for water mask bounding box (km)')
4446
parser.add_argument('-v', '--version', type=str, action='store',
45-
dest='version', default='0.3',
47+
dest='version', default='0.5',
4648
help='Version for water mask')
4749
parser.add_argument('-b', '--bbox', type=float, action='store',
4850
dest='bbox', default=None, nargs='+',
@@ -68,7 +70,7 @@ def check_dateline(poly):
6870

6971
xmin, _, xmax, _ = poly.bounds
7072
# Check dateline crossing
71-
if (xmax - xmin) > 180.0:
73+
if ((xmax - xmin > 180.0) or (xmin <= 180.0 <= xmax) or (xmin <= -180.0 <= xmax)):
7274
dateline = shapely.wkt.loads('LINESTRING( 180.0 -90.0, 180.0 90.0)')
7375

7476
# build new polygon with all longitudes between 0 and 360
@@ -84,11 +86,11 @@ def check_dateline(poly):
8486

8587
polys = list(decomp)
8688

87-
# The Copernicus DEM used for NISAR processing has a longitude
89+
# The water mask used for NISAR processing has a longitude
8890
# range [-180, +180]. The current version of gdal.Translate
8991
# does not allow to perform dateline wrapping. Therefore, coordinates
90-
# above 180 need to be wrapped down to -180 to match the Copernicus
91-
# DEM longitude range
92+
# above 180 need to be wrapped down to -180 to match the water mask
93+
# longitude range
9294
for polygon_count in range(2):
9395
x, y = polys[polygon_count].exterior.coords.xy
9496
if not any([k > 180 for k in x]): # pylint: disable=use-a-generator
@@ -98,7 +100,7 @@ def check_dateline(poly):
98100
x_wrapped_minus_360 = np.asarray(x) - 360
99101
polys[polygon_count] = Polygon(zip(x_wrapped_minus_360, y))
100102

101-
assert len(polys) == 2
103+
assert (len(polys) == 2)
102104
else:
103105
# If dateline is not crossed, treat input poly as list
104106
polys = [poly]
@@ -110,15 +112,15 @@ def determine_polygon(ref_slc, bbox=None):
110112
"""Determine bounding polygon using RSLC radar grid/orbit
111113
or user-defined bounding box
112114
113-
Parameters:
115+
Parameters
114116
----------
115117
ref_slc: str
116118
Filepath to reference RSLC product
117119
bbox: list, float
118120
Bounding box with lat/lon coordinates (decimal degrees)
119121
in the form of [West, South, East, North]
120122
121-
Returns:
123+
Returns
122124
-------
123125
poly: shapely.Geometry.Polygon
124126
Bounding polygon corresponding to RSLC perimeter
@@ -137,14 +139,14 @@ def determine_polygon(ref_slc, bbox=None):
137139
def point2epsg(lon, lat):
138140
"""Return EPSG code based on point lat/lon
139141
140-
Parameters:
142+
Parameters
141143
----------
142144
lat: float
143145
Latitude coordinate of the point
144146
lon: float
145147
Longitude coordinate of the point
146148
147-
Returns:
149+
Returns
148150
-------
149151
epsg code corresponding to the point lat/lon coordinates
150152
"""
@@ -167,8 +169,8 @@ def get_geo_polygon(ref_slc, min_height=-500.,
167169
max_height=9000., pts_per_edge=5):
168170
"""Create polygon (EPSG:4326) using RSLC radar grid and orbits
169171
170-
Parameters:
171-
-----------
172+
Parameters
173+
----------
172174
ref_slc: str
173175
Path to RSLC product to stage the water mask for
174176
min_height: float
@@ -178,7 +180,7 @@ def get_geo_polygon(ref_slc, min_height=-500.,
178180
pts_per_edge: float
179181
Number of points per edge for min/max bounding box computation
180182
181-
Returns:
183+
Returns
182184
-------
183185
poly: shapely.Geometry.Polygon
184186
Bounding polygon corresponding to RSLC perimeter on the ground
@@ -220,13 +222,13 @@ def determine_projection(polys):
220222
EPSG is computed for a regular list of points. EPSG
221223
is assigned based on a majority criteria.
222224
223-
Parameters:
224-
-----------
225+
Parameters
226+
----------
225227
polys: shapely.Geometry.Polygon
226228
List of shapely Polygons
227229
228-
Returns:
229-
--------
230+
Returns
231+
-------
230232
epsg:
231233
List of EPSG codes corresponding to elements in polys
232234
"""
@@ -268,7 +270,7 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
268270
throttling (see "Query throttling" section at
269271
https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html).
270272
271-
Parameters:
273+
Parameters
272274
----------
273275
vrt_filename: str
274276
Path to the input VRT file
@@ -289,6 +291,20 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
289291
input_x_min, xres, _, input_y_max, _, yres = ds.GetGeoTransform()
290292
length = ds.GetRasterBand(1).YSize
291293
width = ds.GetRasterBand(1).XSize
294+
295+
# Declare function to snap min/max X and Y
296+
# coordinates over the water grid
297+
def snap_coord(val, snap, offset, round_func):
298+
return round_func((val - offset) / snap) * snap + offset
299+
300+
# Snap edge coordinates using the raster pixel spacing
301+
# and starting coordinates. Max values are rounded
302+
# using np.ceil and min values are rounded with np.floor
303+
x_min = snap_coord(x_min, xres, input_x_min, np.floor)
304+
x_max = snap_coord(x_max, xres, input_x_min, np.ceil)
305+
y_min = snap_coord(y_min, yres, input_y_max, np.floor)
306+
y_max = snap_coord(y_max, yres, input_y_max, np.ceil)
307+
292308
input_y_min = input_y_max + (length * yres)
293309
input_x_max = input_x_min + (width * xres)
294310

@@ -305,9 +321,9 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
305321
# bbox crosses the anti-meridian, the script divides it in two
306322
# bboxes neighboring the anti-meridian. Here, x_min and x_max
307323
# represent the min and max longitude coordinates of one of these
308-
# bboxes. We Add 360 deg if the min longitude of the downloaded DEM
324+
# bboxes. We Add 360 deg if the min longitude of the downloaded water mask
309325
# tile is < 180 deg i.e., there is a dateline crossing.
310-
# This ensure that the mosaicked DEM VRT will span a min
326+
# This ensure that the mosaicked water mask VRT will span a min
311327
# range of longitudes rather than the full [-180, 180] deg
312328
sr = osr.SpatialReference(ds.GetProjection())
313329
epsg_str = sr.GetAttrValue("AUTHORITY", 1)
@@ -324,13 +340,13 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
324340
def download_watermask(polys, epsgs, outfile, version):
325341
"""Download water mask from nisar-WATERMASK bucket
326342
327-
Parameters:
343+
Parameters
328344
----------
329345
polys: shapely.geometry.Polygon
330346
List of shapely polygons
331347
epsg: str, list
332348
List of EPSG codes corresponding to polys
333-
outfile:
349+
outfile: str
334350
Path to the output WATERMASK file to be staged
335351
version: str
336352
Water mask version
@@ -367,18 +383,101 @@ def download_watermask(polys, epsgs, outfile, version):
367383
translate_watermask(vrt_filename, outpath, xmin, xmax, ymin, ymax)
368384

369385
# Build vrt with downloaded watermasks
370-
gdal.BuildVRT(outfile, watermask_list)
371-
except Exception:
386+
vrt_dataset = gdal.BuildVRT(outfile, watermask_list)
387+
388+
# Get the WATER description from the LICENSE.txt file using GDAL
389+
in_license_path = vrt_filename.replace(f'EPSG{epsg}.vrt',
390+
f'EPSG{epsg}/LICENSE.txt')
391+
license_exists = gdal.VSIStatL(in_license_path) is not None
392+
393+
if license_exists:
394+
try:
395+
water_descr = parse_shortdesc_and_notes(in_license_path)
396+
vrt_dataset.SetMetadataItem("water_mask_description",
397+
water_descr)
398+
399+
except Exception as e:
400+
warnings.warn(
401+
f"Found {in_license_path} but could not parse it ({e}). "
402+
"Proceeding without metadata."
403+
)
404+
else:
405+
warnings.warn(
406+
f"No LICENSE.txt found at {in_license_path} "
407+
f"(expected for WATERMASK v0.4+). Proceeding without metadata."
408+
)
409+
except Exception as e:
372410
errmsg = f'Failed to download NISAR WATERMASK {version} from s3 bucket. ' \
373411
f'Maybe {version} is not currently supported.'
374-
raise ValueError(errmsg)
412+
raise ValueError(errmsg) from e
413+
414+
415+
def parse_shortdesc_and_notes(license_path: str,
416+
encoding: str = "utf-8") -> str | None:
417+
"""
418+
Extract the multi-line text of the “Notes” bullet from
419+
a LICENSE.txt in an S3 bucket via GDAL and return the
420+
value without the leading "- Notes:" header.
421+
422+
Parameters
423+
----------
424+
license_path : str
425+
`/vsis3/.../LICENSE.txt` or any other GDAL compatible URI.
426+
encoding : str, default "utf-8"
427+
Text encoding to decode the bytes.
428+
429+
Returns
430+
-------
431+
str | None
432+
The extracted Notes text (possibly multi-line) with surrounding
433+
whitespace stripped, or None if no “Notes” bullet is found.
434+
"""
435+
stat = gdal.VSIStatL(license_path)
436+
if stat is None:
437+
raise ValueError(f"No LICENSE.txt found at {license_path}")
438+
439+
fp = gdal.VSIFOpenL(license_path, "rb")
440+
if fp is None:
441+
raise IOError(f"Could not open {license_path} via GDAL")
442+
try:
443+
data = gdal.VSIFReadL(1, stat.size, fp)
444+
finally:
445+
gdal.VSIFCloseL(fp)
446+
447+
if not data:
448+
raise IOError(f"Failed to read any data from {license_path}")
449+
450+
text = data.decode(encoding, errors="replace")
451+
452+
if "notes:" not in text.lower():
453+
warnings.warn(f'No "Notes" field found in {license_path}')
454+
return None
455+
456+
NEXT_BULLET = r"(?=^\s*-\s+[^:\n]+:\s*|\Z)"
457+
458+
def _grab(header: str) -> str | None:
459+
pat = re.compile(
460+
rf"^\s*-\s*{re.escape(header)}\s*:\s*(.*?){NEXT_BULLET}",
461+
flags=re.IGNORECASE | re.MULTILINE | re.DOTALL,
462+
)
463+
m = pat.search(text)
464+
return m.group(1).strip() if (m is not None) else None
465+
466+
shortdesc = _grab("Short description")
467+
notes = _grab("Notes")
468+
469+
if not (shortdesc and notes):
470+
raise ValueError(
471+
f'Missing "Short description" or "Notes" in {license_path}')
472+
473+
return ' '.join([shortdesc, notes])
375474

376475

377476
def transform_polygon_coords(polys, epsgs):
378477
"""Transform coordinates of polys (list of polygons)
379478
to target epsgs (list of EPSG codes)
380479
381-
Parameters:
480+
Parameters
382481
----------
383482
polys: shapely.Geometry.Polygon
384483
List of shapely polygons
@@ -421,14 +520,14 @@ def check_watermask_overlap(watermaskFilepath, polys):
421520
and WATERMASK that stage_watermask.py would download
422521
based on RSLC or bbox provided information
423522
424-
Parameters:
523+
Parameters
425524
----------
426525
watermaskFilepath: str
427526
Filepath to the user-provided WATERMASK
428527
polys: shapely.geometry.Polygon
429528
List of polygons computed from RSLC or bbox
430529
431-
Returns:
530+
Returns
432531
-------
433532
perc_area: float
434533
Area (in percentage) covered by the intersection between the
@@ -455,7 +554,7 @@ def check_watermask_overlap(watermaskFilepath, polys):
455554
return perc_area
456555

457556

458-
def check_aws_connection():
557+
def check_aws_connection(version):
459558
"""Check connection to AWS s3://nisar-static-repo/WATER_MASK bucket
460559
Throw exception if no connection is established
461560
@@ -467,7 +566,7 @@ def check_aws_connection():
467566
import boto3
468567
s3 = boto3.resource('s3')
469568
# Check only AWS connection using currently available vrt file.
470-
obj = s3.Object(f'{STATIC_REPO}', 'WATER_MASK/v0.3/EPSG4326.vrt')
569+
obj = s3.Object(f'{STATIC_REPO}', f'WATER_MASK/v{version}/EPSG4326.vrt')
471570
try:
472571
obj.get()['Body'].read()
473572
except Exception:
@@ -485,7 +584,7 @@ def apply_margin_polygon(polygon, margin_in_km=5):
485584
----------
486585
polygon: shapely.Geometry.Polygon
487586
Bounding polygon covering the area on the
488-
ground over which download the DEM
587+
ground over which download the water mask
489588
margin_in_km: np.float
490589
Buffer in km to add to polygon
491590
@@ -501,6 +600,9 @@ def apply_margin_polygon(polygon, margin_in_km=5):
501600
lat_margin = margin_km_to_deg(margin_in_km)
502601
lon_margin = margin_km_to_longitude_deg(margin_in_km, lat=lat_worst_case)
503602

603+
if lon_max - lon_min > 180:
604+
lon_min, lon_max = lon_max, lon_min
605+
504606
poly_with_margin = box(lon_min - lon_margin, max([lat_min - lat_margin, -90]),
505607
lon_max + lon_margin, min([lat_max + lat_margin, 90]))
506608
return poly_with_margin
@@ -552,7 +654,7 @@ def margin_km_to_longitude_deg(margin_in_km, lat=0):
552654
def main(opts):
553655
"""Main script to execute water mask staging
554656
555-
Parameters:
657+
Parameters
556658
----------
557659
opts : argparse.ArgumentParser
558660
Argument parser
@@ -585,9 +687,9 @@ def main(opts):
585687
print('Insufficient water mask coverage. Errors might occur')
586688
print(f'water mask coverage is {overlap} %')
587689
else:
588-
# Check connection to AWS s3 nisar-WATERMASK ucket
690+
# Check connection to AWS s3 nisar-WATERMASK bucket
589691
try:
590-
check_aws_connection()
692+
check_aws_connection(opts.version)
591693
except ImportError:
592694
import warnings
593695
warnings.warn('boto3 is required to verify AWS connection '

0 commit comments

Comments
 (0)