44
55import argparse
66import os
7- import backoff
7+ import re
8+ import warnings
89
10+ import backoff
911import numpy as np
1012import shapely .ops
1113import shapely .wkt
2527
2628def cmdLineParse ():
2729 """
28- Command line parser
30+ Command line parser
2931 """
3032 parser = argparse .ArgumentParser (description = """
3133 Stage and verify WATERMASK for processing. """ ,
@@ -42,7 +44,7 @@ def cmdLineParse():
4244 parser .add_argument ('-m' , '--margin' , type = int , action = 'store' ,
4345 default = 5 , help = 'Margin for water mask bounding box (km)' )
4446 parser .add_argument ('-v' , '--version' , type = str , action = 'store' ,
45- dest = 'version' , default = '0.3 ' ,
47+ dest = 'version' , default = '0.5 ' ,
4648 help = 'Version for water mask' )
4749 parser .add_argument ('-b' , '--bbox' , type = float , action = 'store' ,
4850 dest = 'bbox' , default = None , nargs = '+' ,
@@ -68,7 +70,7 @@ def check_dateline(poly):
6870
6971 xmin , _ , xmax , _ = poly .bounds
7072 # Check dateline crossing
71- if (xmax - xmin ) > 180.0 :
73+ if (( xmax - xmin > 180.0 ) or ( xmin <= 180.0 <= xmax ) or ( xmin <= - 180.0 <= xmax )) :
7274 dateline = shapely .wkt .loads ('LINESTRING( 180.0 -90.0, 180.0 90.0)' )
7375
7476 # build new polygon with all longitudes between 0 and 360
@@ -84,11 +86,11 @@ def check_dateline(poly):
8486
8587 polys = list (decomp )
8688
87- # The Copernicus DEM used for NISAR processing has a longitude
89+ # The water mask used for NISAR processing has a longitude
8890 # range [-180, +180]. The current version of gdal.Translate
8991 # does not allow to perform dateline wrapping. Therefore, coordinates
90- # above 180 need to be wrapped down to -180 to match the Copernicus
91- # DEM longitude range
92+ # above 180 need to be wrapped down to -180 to match the water mask
93+ # longitude range
9294 for polygon_count in range (2 ):
9395 x , y = polys [polygon_count ].exterior .coords .xy
9496 if not any ([k > 180 for k in x ]): # pylint: disable=use-a-generator
@@ -98,7 +100,7 @@ def check_dateline(poly):
98100 x_wrapped_minus_360 = np .asarray (x ) - 360
99101 polys [polygon_count ] = Polygon (zip (x_wrapped_minus_360 , y ))
100102
101- assert len (polys ) == 2
103+ assert ( len (polys ) == 2 )
102104 else :
103105 # If dateline is not crossed, treat input poly as list
104106 polys = [poly ]
@@ -110,15 +112,15 @@ def determine_polygon(ref_slc, bbox=None):
110112 """Determine bounding polygon using RSLC radar grid/orbit
111113 or user-defined bounding box
112114
113- Parameters:
115+ Parameters
114116 ----------
115117 ref_slc: str
116118 Filepath to reference RSLC product
117119 bbox: list, float
118120 Bounding box with lat/lon coordinates (decimal degrees)
119121 in the form of [West, South, East, North]
120122
121- Returns:
123+ Returns
122124 -------
123125 poly: shapely.Geometry.Polygon
124126 Bounding polygon corresponding to RSLC perimeter
@@ -137,14 +139,14 @@ def determine_polygon(ref_slc, bbox=None):
137139def point2epsg (lon , lat ):
138140 """Return EPSG code based on point lat/lon
139141
140- Parameters:
142+ Parameters
141143 ----------
142144 lat: float
143145 Latitude coordinate of the point
144146 lon: float
145147 Longitude coordinate of the point
146148
147- Returns:
149+ Returns
148150 -------
149151 epsg code corresponding to the point lat/lon coordinates
150152 """
@@ -167,8 +169,8 @@ def get_geo_polygon(ref_slc, min_height=-500.,
167169 max_height = 9000. , pts_per_edge = 5 ):
168170 """Create polygon (EPSG:4326) using RSLC radar grid and orbits
169171
170- Parameters:
171- -----------
172+ Parameters
173+ ----------
172174 ref_slc: str
173175 Path to RSLC product to stage the water mask for
174176 min_height: float
@@ -178,7 +180,7 @@ def get_geo_polygon(ref_slc, min_height=-500.,
178180 pts_per_edge: float
179181 Number of points per edge for min/max bounding box computation
180182
181- Returns:
183+ Returns
182184 -------
183185 poly: shapely.Geometry.Polygon
184186 Bounding polygon corresponding to RSLC perimeter on the ground
@@ -220,13 +222,13 @@ def determine_projection(polys):
220222 EPSG is computed for a regular list of points. EPSG
221223 is assigned based on a majority criteria.
222224
223- Parameters:
224- -----------
225+ Parameters
226+ ----------
225227 polys: shapely.Geometry.Polygon
226228 List of shapely Polygons
227229
228- Returns:
229- --------
230+ Returns
231+ -------
230232 epsg:
231233 List of EPSG codes corresponding to elements in polys
232234 """
@@ -268,7 +270,7 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
268270 throttling (see "Query throttling" section at
269271 https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html).
270272
271- Parameters:
273+ Parameters
272274 ----------
273275 vrt_filename: str
274276 Path to the input VRT file
@@ -289,6 +291,20 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
289291 input_x_min , xres , _ , input_y_max , _ , yres = ds .GetGeoTransform ()
290292 length = ds .GetRasterBand (1 ).YSize
291293 width = ds .GetRasterBand (1 ).XSize
294+
295+ # Declare function to snap min/max X and Y
296+ # coordinates over the water grid
297+ def snap_coord (val , snap , offset , round_func ):
298+ return round_func ((val - offset ) / snap ) * snap + offset
299+
300+ # Snap edge coordinates using the raster pixel spacing
301+ # and starting coordinates. Max values are rounded
302+ # using np.ceil and min values are rounded with np.floor
303+ x_min = snap_coord (x_min , xres , input_x_min , np .floor )
304+ x_max = snap_coord (x_max , xres , input_x_min , np .ceil )
305+ y_min = snap_coord (y_min , yres , input_y_max , np .floor )
306+ y_max = snap_coord (y_max , yres , input_y_max , np .ceil )
307+
292308 input_y_min = input_y_max + (length * yres )
293309 input_x_max = input_x_min + (width * xres )
294310
@@ -305,9 +321,9 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
305321 # bbox crosses the anti-meridian, the script divides it in two
306322 # bboxes neighboring the anti-meridian. Here, x_min and x_max
307323 # represent the min and max longitude coordinates of one of these
308- # bboxes. We Add 360 deg if the min longitude of the downloaded DEM
324+ # bboxes. We Add 360 deg if the min longitude of the downloaded water mask
309325 # tile is < 180 deg i.e., there is a dateline crossing.
310- # This ensure that the mosaicked DEM VRT will span a min
326+ # This ensure that the mosaicked water mask VRT will span a min
311327 # range of longitudes rather than the full [-180, 180] deg
312328 sr = osr .SpatialReference (ds .GetProjection ())
313329 epsg_str = sr .GetAttrValue ("AUTHORITY" , 1 )
@@ -324,13 +340,13 @@ def translate_watermask(vrt_filename, outpath, x_min, x_max, y_min, y_max):
324340def download_watermask (polys , epsgs , outfile , version ):
325341 """Download water mask from nisar-WATERMASK bucket
326342
327- Parameters:
343+ Parameters
328344 ----------
329345 polys: shapely.geometry.Polygon
330346 List of shapely polygons
331347 epsg: str, list
332348 List of EPSG codes corresponding to polys
333- outfile:
349+ outfile: str
334350 Path to the output WATERMASK file to be staged
335351 version: str
336352 Water mask version
@@ -367,18 +383,101 @@ def download_watermask(polys, epsgs, outfile, version):
367383 translate_watermask (vrt_filename , outpath , xmin , xmax , ymin , ymax )
368384
369385 # Build vrt with downloaded watermasks
370- gdal .BuildVRT (outfile , watermask_list )
371- except Exception :
386+ vrt_dataset = gdal .BuildVRT (outfile , watermask_list )
387+
388+ # Get the WATER description from the LICENSE.txt file using GDAL
389+ in_license_path = vrt_filename .replace (f'EPSG{ epsg } .vrt' ,
390+ f'EPSG{ epsg } /LICENSE.txt' )
391+ license_exists = gdal .VSIStatL (in_license_path ) is not None
392+
393+ if license_exists :
394+ try :
395+ water_descr = parse_shortdesc_and_notes (in_license_path )
396+ vrt_dataset .SetMetadataItem ("water_mask_description" ,
397+ water_descr )
398+
399+ except Exception as e :
400+ warnings .warn (
401+ f"Found { in_license_path } but could not parse it ({ e } ). "
402+ "Proceeding without metadata."
403+ )
404+ else :
405+ warnings .warn (
406+ f"No LICENSE.txt found at { in_license_path } "
407+ f"(expected for WATERMASK v0.4+). Proceeding without metadata."
408+ )
409+ except Exception as e :
372410 errmsg = f'Failed to download NISAR WATERMASK { version } from s3 bucket. ' \
373411 f'Maybe { version } is not currently supported.'
374- raise ValueError (errmsg )
412+ raise ValueError (errmsg ) from e
413+
414+
415+ def parse_shortdesc_and_notes (license_path : str ,
416+ encoding : str = "utf-8" ) -> str | None :
417+ """
418+ Extract the multi-line text of the “Notes” bullet from
419+ a LICENSE.txt in an S3 bucket via GDAL and return the
420+ value without the leading "- Notes:" header.
421+
422+ Parameters
423+ ----------
424+ license_path : str
425+ `/vsis3/.../LICENSE.txt` or any other GDAL compatible URI.
426+ encoding : str, default "utf-8"
427+ Text encoding to decode the bytes.
428+
429+ Returns
430+ -------
431+ str | None
432+ The extracted Notes text (possibly multi-line) with surrounding
433+ whitespace stripped, or None if no “Notes” bullet is found.
434+ """
435+ stat = gdal .VSIStatL (license_path )
436+ if stat is None :
437+ raise ValueError (f"No LICENSE.txt found at { license_path } " )
438+
439+ fp = gdal .VSIFOpenL (license_path , "rb" )
440+ if fp is None :
441+ raise IOError (f"Could not open { license_path } via GDAL" )
442+ try :
443+ data = gdal .VSIFReadL (1 , stat .size , fp )
444+ finally :
445+ gdal .VSIFCloseL (fp )
446+
447+ if not data :
448+ raise IOError (f"Failed to read any data from { license_path } " )
449+
450+ text = data .decode (encoding , errors = "replace" )
451+
452+ if "notes:" not in text .lower ():
453+ warnings .warn (f'No "Notes" field found in { license_path } ' )
454+ return None
455+
456+ NEXT_BULLET = r"(?=^\s*-\s+[^:\n]+:\s*|\Z)"
457+
458+ def _grab (header : str ) -> str | None :
459+ pat = re .compile (
460+ rf"^\s*-\s*{ re .escape (header )} \s*:\s*(.*?){ NEXT_BULLET } " ,
461+ flags = re .IGNORECASE | re .MULTILINE | re .DOTALL ,
462+ )
463+ m = pat .search (text )
464+ return m .group (1 ).strip () if (m is not None ) else None
465+
466+ shortdesc = _grab ("Short description" )
467+ notes = _grab ("Notes" )
468+
469+ if not (shortdesc and notes ):
470+ raise ValueError (
471+ f'Missing "Short description" or "Notes" in { license_path } ' )
472+
473+ return ' ' .join ([shortdesc , notes ])
375474
376475
377476def transform_polygon_coords (polys , epsgs ):
378477 """Transform coordinates of polys (list of polygons)
379478 to target epsgs (list of EPSG codes)
380479
381- Parameters:
480+ Parameters
382481 ----------
383482 polys: shapely.Geometry.Polygon
384483 List of shapely polygons
@@ -421,14 +520,14 @@ def check_watermask_overlap(watermaskFilepath, polys):
421520 and WATERMASK that stage_watermask.py would download
422521 based on RSLC or bbox provided information
423522
424- Parameters:
523+ Parameters
425524 ----------
426525 watermaskFilepath: str
427526 Filepath to the user-provided WATERMASK
428527 polys: shapely.geometry.Polygon
429528 List of polygons computed from RSLC or bbox
430529
431- Returns:
530+ Returns
432531 -------
433532 perc_area: float
434533 Area (in percentage) covered by the intersection between the
@@ -455,7 +554,7 @@ def check_watermask_overlap(watermaskFilepath, polys):
455554 return perc_area
456555
457556
458- def check_aws_connection ():
557+ def check_aws_connection (version ):
459558 """Check connection to AWS s3://nisar-static-repo/WATER_MASK bucket
460559 Throw exception if no connection is established
461560
@@ -467,7 +566,7 @@ def check_aws_connection():
467566 import boto3
468567 s3 = boto3 .resource ('s3' )
469568 # Check only AWS connection using currently available vrt file.
470- obj = s3 .Object (f'{ STATIC_REPO } ' , 'WATER_MASK/v0.3 /EPSG4326.vrt' )
569+ obj = s3 .Object (f'{ STATIC_REPO } ' , f 'WATER_MASK/v { version } /EPSG4326.vrt' )
471570 try :
472571 obj .get ()['Body' ].read ()
473572 except Exception :
@@ -485,7 +584,7 @@ def apply_margin_polygon(polygon, margin_in_km=5):
485584 ----------
486585 polygon: shapely.Geometry.Polygon
487586 Bounding polygon covering the area on the
488- ground over which download the DEM
587+ ground over which download the water mask
489588 margin_in_km: np.float
490589 Buffer in km to add to polygon
491590
@@ -501,6 +600,9 @@ def apply_margin_polygon(polygon, margin_in_km=5):
501600 lat_margin = margin_km_to_deg (margin_in_km )
502601 lon_margin = margin_km_to_longitude_deg (margin_in_km , lat = lat_worst_case )
503602
603+ if lon_max - lon_min > 180 :
604+ lon_min , lon_max = lon_max , lon_min
605+
504606 poly_with_margin = box (lon_min - lon_margin , max ([lat_min - lat_margin , - 90 ]),
505607 lon_max + lon_margin , min ([lat_max + lat_margin , 90 ]))
506608 return poly_with_margin
@@ -552,7 +654,7 @@ def margin_km_to_longitude_deg(margin_in_km, lat=0):
552654def main (opts ):
553655 """Main script to execute water mask staging
554656
555- Parameters:
657+ Parameters
556658 ----------
557659 opts : argparse.ArgumentParser
558660 Argument parser
@@ -585,9 +687,9 @@ def main(opts):
585687 print ('Insufficient water mask coverage. Errors might occur' )
586688 print (f'water mask coverage is { overlap } %' )
587689 else :
588- # Check connection to AWS s3 nisar-WATERMASK ucket
690+ # Check connection to AWS s3 nisar-WATERMASK bucket
589691 try :
590- check_aws_connection ()
692+ check_aws_connection (opts . version )
591693 except ImportError :
592694 import warnings
593695 warnings .warn ('boto3 is required to verify AWS connection '
0 commit comments