Skip to content

Commit 4879287

Browse files
authored
Merge pull request #114 from ICESat2-SlideRule/cmr
refactor: can query for additional granule metadata in CMR search
2 parents 7ebf146 + 9f38a0b commit 4879287

File tree

2 files changed

+80
-48
lines changed

2 files changed

+80
-48
lines changed

examples/cmr_debug_regions.ipynb

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,16 @@
126126
"granule_polygons = []\n",
127127
"for poly in m.regions:\n",
128128
" # polygon from map\n",
129-
" resources,polygons = icesat2.cmr(polygon=poly,\n",
129+
" resources,metadata = icesat2.cmr(polygon=poly,\n",
130130
" short_name=SRwidgets.product.value,\n",
131131
" time_start=SRwidgets.time_start,\n",
132132
" time_end=SRwidgets.time_end,\n",
133133
" version=SRwidgets.release.value,\n",
134-
" return_polygons=True)\n",
135-
" granule_list.extend(resources)\n",
136-
" granule_polygons.extend(polygons)\n",
134+
" return_metadata=True)\n",
135+
" # for each granule resource\n",
136+
" for i,resource in enumerate(resources):\n",
137+
" granule_list.append(resource)\n",
138+
" granule_polygons.append(metadata[i].geometry)\n",
137139
"# print list of granules\n",
138140
"num_granules = len(granule_list)\n",
139141
"logging.info('Number of Granules: {0:d}'.format(num_granules))\n",
@@ -178,16 +180,18 @@
178180
"granule_indices = list(granule_select.index)\n",
179181
"cmap = iter(cm.viridis(np.linspace(0,1,len(granule_indices))))\n",
180182
"for g in granule_indices:\n",
181-
" locations = [(p['lat'],p['lon']) for p in granule_polygons[g]]\n",
182183
" color = colors.to_hex(next(cmap))\n",
183-
" polygon = ipysliderule.ipyleaflet.Polygon(\n",
184-
" locations=locations,\n",
185-
" color=color,\n",
186-
" fill_color=color,\n",
187-
" opacity=0.8,\n",
188-
" weight=1, \n",
184+
" geojson = ipysliderule.ipyleaflet.GeoJSON(\n",
185+
" data=granule_polygons[g].__geo_interface__,\n",
186+
" style=dict(\n",
187+
" color=color,\n",
188+
" fill_color=color,\n",
189+
" opacity=0.8,\n",
190+
" weight=1,\n",
191+
" )\n",
189192
" )\n",
190-
" m.map.add_layer(polygon)"
193+
" m.map.add_layer(geojson)\n",
194+
" "
191195
]
192196
},
193197
{

sliderule/icesat2.py

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -181,25 +181,50 @@ def __cmr_filter_urls(search_results):
181181

182182
return urls
183183

184-
def __cmr_granule_polygons(search_results):
185-
"""Get the polygons for CMR returned granules"""
184+
def __cmr_granule_metadata(search_results):
185+
"""Get the metadata for CMR returned granules"""
186+
# GeoDataFrame with granule metadata
187+
granule_metadata = __emptyframe()
188+
# return empty dataframe if no CMR entries
186189
if 'feed' not in search_results or 'entry' not in search_results['feed']:
187-
return []
188-
granule_polygons = []
190+
return granule_metadata
189191
# for each CMR entry
190192
for e in search_results['feed']['entry']:
191-
# for each polygon
192-
for polys in e['polygons']:
193-
coords = [float(i) for i in polys[0].split()]
194-
region = [{'lon':x,'lat':y} for y,x in zip(coords[::2],coords[1::2])]
195-
granule_polygons.append(region)
196-
# return granule polygons in sliderule region format
197-
return granule_polygons
193+
# columns for dataframe
194+
columns = {}
195+
# time start and time end of granule
196+
columns['time_start'] = numpy.datetime64(e['time_start'])
197+
columns['time_end'] = numpy.datetime64(e['time_end'])
198+
columns['time_updated'] = numpy.datetime64(e['updated'])
199+
# get the granule size and convert to bits
200+
columns['granule_size'] = float(e['granule_size'])*(2.0**20)
201+
# Create Pandas DataFrame object
202+
# use granule id as index
203+
df = geopandas.pd.DataFrame(columns, index=[e['id']])
204+
# Generate Geometry Column
205+
if 'polygons' in e:
206+
coords = [float(i) for i in e['polygons'][0][0].split()]
207+
geometry = Polygon(zip(coords[1::2], coords[::2]))
208+
else:
209+
geometry, = geopandas.points_from_xy([None], [None])
210+
# Build GeoDataFrame (default geometry is crs=EPSG_MERCATOR)
211+
gdf = geopandas.GeoDataFrame(df, geometry=[geometry], crs=EPSG_MERCATOR)
212+
# append to combined GeoDataFrame and catch warnings
213+
with warnings.catch_warnings():
214+
warnings.simplefilter("ignore")
215+
granule_metadata = granule_metadata.append(gdf)
216+
# return granule metadata
217+
# - time start and time end
218+
# - time granule was updated
219+
# - granule size in bits
220+
# - polygons as geodataframe geometry
221+
return granule_metadata
198222

199223
def __cmr_search(short_name, version, time_start, time_end, **kwargs):
200224
"""Perform a scrolling CMR query for files matching input criteria."""
201225
kwargs.setdefault('polygon',None)
202-
kwargs.setdefault('return_polygons',False)
226+
kwargs.setdefault('name_filter',None)
227+
kwargs.setdefault('return_metadata',False)
203228
# build params
204229
params = '&short_name={0}'.format(short_name)
205230
params += __build_version_query_params(version)
@@ -218,7 +243,8 @@ def __cmr_search(short_name, version, time_start, time_end, **kwargs):
218243
ctx.verify_mode = ssl.CERT_NONE
219244

220245
urls = []
221-
polys = []
246+
# GeoDataFrame with granule metadata
247+
metadata = __emptyframe()
222248
while True:
223249
req = urllib.request.Request(cmr_query_url)
224250
if cmr_scroll_id:
@@ -235,14 +261,17 @@ def __cmr_search(short_name, version, time_start, time_end, **kwargs):
235261
if not url_scroll_results:
236262
break
237263
urls += url_scroll_results
238-
# append granule polygons
239-
if kwargs['return_polygons']:
240-
polygon_results = __cmr_granule_polygons(search_page)
264+
# query for granule metadata and polygons
265+
if kwargs['return_metadata']:
266+
metadata_results = __cmr_granule_metadata(search_page)
241267
else:
242-
polygon_results = [None for _ in url_scroll_results]
243-
polys.extend(polygon_results)
268+
metadata_results = [None for _ in url_scroll_results]
269+
# append granule metadata and catch warnings
270+
with warnings.catch_warnings():
271+
warnings.simplefilter("ignore")
272+
metadata = metadata.append(metadata_results)
244273

245-
return (urls,polys)
274+
return (urls,metadata)
246275

247276
###############################################################################
248277
# LOCAL FUNCTIONS
@@ -313,7 +342,7 @@ def __get_values(data, dtype, size):
313342
#
314343
# Query Resources from CMR
315344
#
316-
def __query_resources(parm, version, return_polygons=False):
345+
def __query_resources(parm, version, **kwargs):
317346

318347
# Latch Start Time
319348
tstart = time.perf_counter()
@@ -324,9 +353,8 @@ def __query_resources(parm, version, return_polygons=False):
324353
return []
325354

326355
# Submission Arguments for CMR
327-
kwargs = {}
328356
kwargs['version'] = version
329-
kwargs['return_polygons'] = return_polygons
357+
kwargs.setdefault('return_metadata', False)
330358

331359
# Pull Out Polygon
332360
if "clusters" in parm and parm["clusters"] and len(parm["clusters"]) > 0:
@@ -358,8 +386,8 @@ def __query_resources(parm, version, return_polygons=False):
358386
kwargs['name_filter'] = '*_' + rgt_filter + cycle_filter + region_filter + '_*'
359387

360388
# Make CMR Request
361-
if return_polygons:
362-
resources,polygons = cmr(**kwargs)
389+
if kwargs['return_metadata']:
390+
resources,metadata = cmr(**kwargs)
363391
else:
364392
resources = cmr(**kwargs)
365393

@@ -373,8 +401,8 @@ def __query_resources(parm, version, return_polygons=False):
373401
profiles[__query_resources.__name__] = time.perf_counter() - tstart
374402

375403
# Return Resources
376-
if return_polygons:
377-
return (resources,polygons)
404+
if kwargs['return_metadata']:
405+
return (resources,metadata)
378406
else:
379407
return resources
380408

@@ -564,8 +592,8 @@ def cmr(**kwargs):
564592
# set default version and product short name
565593
kwargs.setdefault('version', DEFAULT_ICESAT2_SDP_VERSION)
566594
kwargs.setdefault('short_name','ATL03')
567-
# return polygons for each requested granule
568-
kwargs.setdefault('return_polygons', False)
595+
# return metadata for each requested granule
596+
kwargs.setdefault('return_metadata', False)
569597
# set default name filter
570598
kwargs.setdefault('name_filter', None)
571599

@@ -583,7 +611,7 @@ def cmr(**kwargs):
583611
# iterate through each polygon (or none if none supplied)
584612
for polygon in polygons:
585613
urls = []
586-
polys = []
614+
metadata = __emptyframe()
587615

588616
# issue CMR request
589617
for tolerance in [0.0001, 0.001, 0.01, 0.1, 1.0, None]:
@@ -600,12 +628,12 @@ def cmr(**kwargs):
600628

601629
# call into NSIDC routines to make CMR request
602630
try:
603-
urls,polys = __cmr_search(kwargs['short_name'],
631+
urls,metadata = __cmr_search(kwargs['short_name'],
604632
kwargs['version'],
605633
kwargs['time_start'],
606634
kwargs['time_end'],
607635
polygon=polystr,
608-
return_polygons=kwargs['return_polygons'],
636+
return_metadata=kwargs['return_metadata'],
609637
name_filter=kwargs['name_filter'])
610638
break # exit loop because cmr search was successful
611639
except urllib.error.HTTPError as e:
@@ -630,15 +658,15 @@ def cmr(**kwargs):
630658
break # exit here because nothing can be done
631659

632660
# populate resources
633-
for url, poly in zip(urls, polys):
634-
resources[url] = poly
661+
for i,url, in enumerate(urls):
662+
resources[url] = metadata.iloc[i]
635663

636664
# build return lists
637665
url_list = list(resources.keys())
638-
poly_list = list(resources.values())
666+
meta_list = list(resources.values())
639667

640-
if kwargs['return_polygons']:
641-
return (url_list,poly_list)
668+
if kwargs['return_metadata']:
669+
return (url_list,meta_list)
642670
else:
643671
return url_list
644672

0 commit comments

Comments
 (0)