Skip to content

Commit 0c4b479

Browse files
Add files via upload
1 parent 3d25407 commit 0c4b479

File tree

4 files changed

+716
-0
lines changed

4 files changed

+716
-0
lines changed

nsidc-download_MODIS.py

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
#!/usr/bin/env python
2+
# ----------------------------------------------------------------------------
3+
# NSIDC Data Download Script
4+
#
5+
# Copyright (c) 2020 Regents of the University of Colorado
6+
# Permission is hereby granted, free of charge, to any person obtaining
7+
# a copy of this software and associated documentation files (the "Software"),
8+
# to deal in the Software without restriction, including without limitation
9+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
10+
# and/or sell copies of the Software, and to permit persons to whom the
11+
# Software is furnished to do so, subject to the following conditions:
12+
# The above copyright notice and this permission notice shall be included
13+
# in all copies or substantial portions of the Software.
14+
#
15+
# Tested in Python 2.7 and Python 3.4, 3.6, 3.7
16+
#
17+
# To run the script at a Linux, macOS, or Cygwin command-line terminal:
18+
# $ python nsidc-data-download.py
19+
#
20+
# On Windows, open Start menu -> Run and type cmd. Then type:
21+
# python nsidc-data-download.py
22+
#
23+
# The script will first search Earthdata for all matching files.
24+
# You will then be prompted for your Earthdata username/password
25+
# and the script will download the matching files.
26+
# If you wish, you may store your Earthdata username/password in a .netrc
27+
# file in your $HOME directory and the script will automatically attempt to
28+
# read this file. The .netrc file should have the following format:
29+
# machine urs.earthdata.nasa.gov login myusername password mypassword
30+
# where 'myusername' and 'mypassword' are your Earthdata credentials.
31+
#
32+
from __future__ import print_function
33+
34+
import base64
35+
import itertools
36+
import json
37+
import netrc
38+
import ssl
39+
import sys
40+
from getpass import getpass
41+
import geopandas as gpd
42+
from shapely.geometry import MultiPolygon, JOIN_STYLE
43+
from shapely.ops import unary_union
44+
import numpy as np
45+
import os
46+
47+
try:
48+
from urllib.parse import urlparse
49+
from urllib.request import urlopen, Request, build_opener, HTTPCookieProcessor
50+
from urllib.error import HTTPError, URLError
51+
except ImportError:
52+
from urlparse import urlparse
53+
from urllib2 import urlopen, Request, HTTPError, URLError, build_opener, HTTPCookieProcessor
54+
55+
56+
def get_username():
57+
username = ''
58+
59+
# For Python 2/3 compatibility:
60+
try:
61+
do_input = raw_input # noqa
62+
except NameError:
63+
do_input = input
64+
65+
while not username:
66+
try:
67+
username = do_input('Earthdata username: ')
68+
except KeyboardInterrupt:
69+
quit()
70+
return username
71+
72+
73+
def get_password():
74+
password = ''
75+
while not password:
76+
try:
77+
password = getpass('password: ')
78+
except KeyboardInterrupt:
79+
quit()
80+
return password
81+
82+
83+
def get_credentials(url):
84+
"""Get user credentials from .netrc or prompt for input."""
85+
credentials = None
86+
errprefix = ''
87+
try:
88+
info = netrc.netrc()
89+
username, account, password = info.authenticators(urlparse(URS_URL).hostname)
90+
errprefix = 'netrc error: '
91+
except Exception as e:
92+
if (not ('No such file' in str(e))):
93+
print('netrc error: {0}'.format(str(e)))
94+
username = None
95+
password = None
96+
97+
while not credentials:
98+
if not username:
99+
username = get_username()
100+
password = get_password()
101+
credentials = '{0}:{1}'.format(username, password)
102+
credentials = base64.b64encode(credentials.encode('ascii')).decode('ascii')
103+
104+
if url:
105+
try:
106+
req = Request(url)
107+
req.add_header('Authorization', 'Basic {0}'.format(credentials))
108+
opener = build_opener(HTTPCookieProcessor())
109+
opener.open(req)
110+
except HTTPError:
111+
print(errprefix + 'Incorrect username or password')
112+
errprefix = ''
113+
credentials = None
114+
username = None
115+
password = None
116+
117+
return credentials
118+
119+
120+
def build_version_query_params(version):
121+
desired_pad_length = 3
122+
if len(version) > desired_pad_length:
123+
print('Version string too long: "{0}"'.format(version))
124+
quit()
125+
126+
version = str(int(version)) # Strip off any leading zeros
127+
query_params = ''
128+
129+
while len(version) <= desired_pad_length:
130+
padded_version = version.zfill(desired_pad_length)
131+
query_params += '&version={0}'.format(padded_version)
132+
desired_pad_length -= 1
133+
return query_params
134+
135+
136+
def build_cmr_query_url(short_name, version, time_start, time_end,
137+
bounding_box=None, polygon=None,
138+
filename_filter=None):
139+
params = '&short_name={0}'.format(short_name)
140+
params += build_version_query_params(version)
141+
params += '&temporal[]={0},{1}'.format(time_start, time_end)
142+
if polygon:
143+
params += '&polygon={0}'.format(polygon)
144+
elif bounding_box:
145+
params += '&bounding_box={0}'.format(bounding_box)
146+
if filename_filter:
147+
option = '&options[producer_granule_id][pattern]=true'
148+
params += '&producer_granule_id[]={0}{1}'.format(filename_filter, option)
149+
return CMR_FILE_URL + params
150+
151+
152+
def cmr_download(urls):
153+
"""Download files from list of urls."""
154+
if not urls:
155+
return
156+
157+
url_count = len(urls)
158+
print('Downloading {0} files...'.format(url_count))
159+
credentials = None
160+
161+
for index, url in enumerate(urls, start=1):
162+
if not credentials and urlparse(url).scheme == 'https':
163+
credentials = get_credentials(url)
164+
165+
filename = url.split('/')[-1]
166+
print('{0}/{1}: {2}'.format(str(index).zfill(len(str(url_count))),
167+
url_count,
168+
filename))
169+
170+
try:
171+
# In Python 3 we could eliminate the opener and just do 2 lines:
172+
# resp = requests.get(url, auth=(username, password))
173+
# open(filename, 'wb').write(resp.content)
174+
req = Request(url)
175+
if credentials:
176+
req.add_header('Authorization', 'Basic {0}'.format(credentials))
177+
opener = build_opener(HTTPCookieProcessor())
178+
data = opener.open(req).read()
179+
open(filename, 'wb').write(data)
180+
except HTTPError as e:
181+
print('HTTP error {0}, {1}'.format(e.code, e.reason))
182+
except URLError as e:
183+
print('URL error: {0}'.format(e.reason))
184+
except IOError:
185+
raise
186+
except KeyboardInterrupt:
187+
quit()
188+
189+
190+
def cmr_filter_urls(search_results):
191+
"""Select only the desired data files from CMR response."""
192+
if 'feed' not in search_results or 'entry' not in search_results['feed']:
193+
return []
194+
195+
entries = [e['links']
196+
for e in search_results['feed']['entry']
197+
if 'links' in e]
198+
# Flatten "entries" to a simple list of links
199+
links = list(itertools.chain(*entries))
200+
201+
urls = []
202+
unique_filenames = set()
203+
for link in links:
204+
if 'href' not in link:
205+
# Exclude links with nothing to download
206+
continue
207+
if 'inherited' in link and link['inherited'] is True:
208+
# Why are we excluding these links?
209+
continue
210+
if 'rel' in link and 'data#' not in link['rel']:
211+
# Exclude links which are not classified by CMR as "data" or "metadata"
212+
continue
213+
214+
if 'title' in link and 'opendap' in link['title'].lower():
215+
# Exclude OPeNDAP links--they are responsible for many duplicates
216+
# This is a hack; when the metadata is updated to properly identify
217+
# non-datapool links, we should be able to do this in a non-hack way
218+
continue
219+
220+
filename = link['href'].split('/')[-1]
221+
if filename in unique_filenames:
222+
# Exclude links with duplicate filenames (they would overwrite)
223+
continue
224+
unique_filenames.add(filename)
225+
226+
urls.append(link['href'])
227+
228+
return urls
229+
230+
231+
def cmr_search(short_name, version, time_start, time_end,
232+
bounding_box='', polygon='', filename_filter=''):
233+
234+
"""Perform a scrolling CMR query for files matching input criteria."""
235+
cmr_query_url = build_cmr_query_url(short_name=short_name, version=version,
236+
time_start=time_start, time_end=time_end,
237+
bounding_box=bounding_box,
238+
polygon=polygon, filename_filter=filename_filter)
239+
print('Querying for data:\n\t{0}\n'.format(cmr_query_url))
240+
241+
cmr_scroll_id = None
242+
ctx = ssl.create_default_context()
243+
ctx.check_hostname = False
244+
ctx.verify_mode = ssl.CERT_NONE
245+
246+
try:
247+
urls = []
248+
while True:
249+
req = Request(cmr_query_url)
250+
if cmr_scroll_id:
251+
req.add_header('cmr-scroll-id', cmr_scroll_id)
252+
response = urlopen(req, context=ctx)
253+
if not cmr_scroll_id:
254+
# Python 2 and 3 have different case for the http headers
255+
headers = {k.lower(): v for k, v in dict(response.info()).items()}
256+
cmr_scroll_id = headers['cmr-scroll-id']
257+
hits = int(headers['cmr-hits'])
258+
if hits > 0:
259+
print('Found {0} matches.'.format(hits))
260+
else:
261+
print('Found no matches.')
262+
search_page = response.read()
263+
search_page = json.loads(search_page.decode('utf-8'))
264+
url_scroll_results = cmr_filter_urls(search_page)
265+
if not url_scroll_results:
266+
break
267+
if hits > CMR_PAGE_SIZE:
268+
print('.', end='')
269+
sys.stdout.flush()
270+
urls += url_scroll_results
271+
272+
if hits > CMR_PAGE_SIZE:
273+
print()
274+
return urls
275+
except KeyboardInterrupt:
276+
quit()
277+
278+
279+
def polygon_coor(inpolygon):
280+
eps=0.75 # width for dilating and eroding (buffer)
281+
dist = 0.3 # threshold distance
282+
# read the original shapefile
283+
df = gpd.read_file(inpolygon)
284+
# create new result shapefile
285+
col = ['geometry']
286+
res = gpd.GeoDataFrame(columns=col)
287+
df_explode=df.explode()
288+
dis=[]
289+
for i, j in list(itertools.combinations(df_explode.index, 2)):
290+
distance = df_explode.geometry[i].distance(df_explode.geometry[j])
291+
# distance between polygons i and j in the shapefile
292+
dis.append(distance)
293+
if distance < dist:
294+
e = MultiPolygon([df_explode.geometry[i],df_explode.geometry[j]])
295+
fx = e.buffer(eps, 1, join_style=JOIN_STYLE.mitre).buffer(-eps, 1, join_style=JOIN_STYLE.mitre)
296+
res = res.append({'geometry':fx},ignore_index=True)
297+
res_explode = res.explode()
298+
res_explode = gpd.GeoDataFrame({'geometry':unary_union(res_explode["geometry"])})
299+
res_explode["area"] = res_explode['geometry'].area
300+
#Compute 95 percentile of the area value as the major polygon
301+
include_area=np.percentile(res_explode["area"].to_numpy(),95)
302+
res_explode1=res_explode[res_explode["area"]>include_area]
303+
#Simplify the shape
304+
res_explode1=res_explode1.simplify(0.05, preserve_topology=True)
305+
coordinates=[]
306+
#Collect coordinates for the verticies
307+
for i in res_explode1:
308+
coordlist=list(zip(i.exterior.coords.xy[0],i.exterior.coords.xy[1]))
309+
#print (coordlist)
310+
for j in coordlist:
311+
coordinates.insert(0,round(j[1],6))
312+
coordinates.insert(0,round(j[0],6))
313+
# save the resulting shapefile to disk
314+
res_explode1.crs = df.crs
315+
res_explode1.to_file(os.path.join(os.path.dirname(inpolygon),os.path.basename(inpolygon)[:-4]+"_simplified.shp"))
316+
#coordinates.reverse()
317+
return (','.join(map(str, coordinates)))
318+
319+
320+
if __name__ == '__main__':
321+
322+
inpolygon='F:/modis/crop/Alaska.shp'
323+
short_name = 'MYD10A1'
324+
version = '6'
325+
time_start = '2016-01-01T00:00:00Z'
326+
time_end = '2016-01-31T23:59:59Z'
327+
bounding_box = ''
328+
polygon = polygon_coor(inpolygon)
329+
filename_filter = ''
330+
url_list = []
331+
332+
CMR_URL = 'https://cmr.earthdata.nasa.gov'
333+
URS_URL = 'https://urs.earthdata.nasa.gov'
334+
CMR_PAGE_SIZE = 2000
335+
CMR_FILE_URL = ('{0}/search/granules.json?provider=NSIDC_ECS'
336+
'&sort_key[]=start_date&sort_key[]=producer_granule_id'
337+
'&scroll=true&page_size={1}'.format(CMR_URL, CMR_PAGE_SIZE))
338+
339+
url_list = cmr_search(short_name, version, time_start, time_end,
340+
bounding_box=bounding_box,
341+
polygon=polygon, filename_filter=filename_filter)
342+
cmr_download(url_list)
343+
344+
345+
346+
347+

0 commit comments

Comments
 (0)