|
| 1 | +#!/usr/bin/env python |
| 2 | +# ---------------------------------------------------------------------------- |
| 3 | +# NSIDC Data Download Script |
| 4 | +# |
| 5 | +# Copyright (c) 2020 Regents of the University of Colorado |
| 6 | +# Permission is hereby granted, free of charge, to any person obtaining |
| 7 | +# a copy of this software and associated documentation files (the "Software"), |
| 8 | +# to deal in the Software without restriction, including without limitation |
| 9 | +# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 10 | +# and/or sell copies of the Software, and to permit persons to whom the |
| 11 | +# Software is furnished to do so, subject to the following conditions: |
| 12 | +# The above copyright notice and this permission notice shall be included |
| 13 | +# in all copies or substantial portions of the Software. |
| 14 | +# |
| 15 | +# Tested in Python 2.7 and Python 3.4, 3.6, 3.7 |
| 16 | +# |
| 17 | +# To run the script at a Linux, macOS, or Cygwin command-line terminal: |
| 18 | +# $ python nsidc-data-download.py |
| 19 | +# |
| 20 | +# On Windows, open Start menu -> Run and type cmd. Then type: |
| 21 | +# python nsidc-data-download.py |
| 22 | +# |
| 23 | +# The script will first search Earthdata for all matching files. |
| 24 | +# You will then be prompted for your Earthdata username/password |
| 25 | +# and the script will download the matching files. |
| 26 | +# If you wish, you may store your Earthdata username/password in a .netrc |
| 27 | +# file in your $HOME directory and the script will automatically attempt to |
| 28 | +# read this file. The .netrc file should have the following format: |
| 29 | +# machine urs.earthdata.nasa.gov login myusername password mypassword |
| 30 | +# where 'myusername' and 'mypassword' are your Earthdata credentials. |
| 31 | +# |
| 32 | +from __future__ import print_function |
| 33 | + |
| 34 | +import base64 |
| 35 | +import itertools |
| 36 | +import json |
| 37 | +import netrc |
| 38 | +import ssl |
| 39 | +import sys |
| 40 | +from getpass import getpass |
| 41 | +import geopandas as gpd |
| 42 | +from shapely.geometry import MultiPolygon, JOIN_STYLE |
| 43 | +from shapely.ops import unary_union |
| 44 | +import numpy as np |
| 45 | +import os |
| 46 | + |
| 47 | +try: |
| 48 | + from urllib.parse import urlparse |
| 49 | + from urllib.request import urlopen, Request, build_opener, HTTPCookieProcessor |
| 50 | + from urllib.error import HTTPError, URLError |
| 51 | +except ImportError: |
| 52 | + from urlparse import urlparse |
| 53 | + from urllib2 import urlopen, Request, HTTPError, URLError, build_opener, HTTPCookieProcessor |
| 54 | + |
| 55 | + |
| 56 | +def get_username(): |
| 57 | + username = '' |
| 58 | + |
| 59 | + # For Python 2/3 compatibility: |
| 60 | + try: |
| 61 | + do_input = raw_input # noqa |
| 62 | + except NameError: |
| 63 | + do_input = input |
| 64 | + |
| 65 | + while not username: |
| 66 | + try: |
| 67 | + username = do_input('Earthdata username: ') |
| 68 | + except KeyboardInterrupt: |
| 69 | + quit() |
| 70 | + return username |
| 71 | + |
| 72 | + |
| 73 | +def get_password(): |
| 74 | + password = '' |
| 75 | + while not password: |
| 76 | + try: |
| 77 | + password = getpass('password: ') |
| 78 | + except KeyboardInterrupt: |
| 79 | + quit() |
| 80 | + return password |
| 81 | + |
| 82 | + |
| 83 | +def get_credentials(url): |
| 84 | + """Get user credentials from .netrc or prompt for input.""" |
| 85 | + credentials = None |
| 86 | + errprefix = '' |
| 87 | + try: |
| 88 | + info = netrc.netrc() |
| 89 | + username, account, password = info.authenticators(urlparse(URS_URL).hostname) |
| 90 | + errprefix = 'netrc error: ' |
| 91 | + except Exception as e: |
| 92 | + if (not ('No such file' in str(e))): |
| 93 | + print('netrc error: {0}'.format(str(e))) |
| 94 | + username = None |
| 95 | + password = None |
| 96 | + |
| 97 | + while not credentials: |
| 98 | + if not username: |
| 99 | + username = get_username() |
| 100 | + password = get_password() |
| 101 | + credentials = '{0}:{1}'.format(username, password) |
| 102 | + credentials = base64.b64encode(credentials.encode('ascii')).decode('ascii') |
| 103 | + |
| 104 | + if url: |
| 105 | + try: |
| 106 | + req = Request(url) |
| 107 | + req.add_header('Authorization', 'Basic {0}'.format(credentials)) |
| 108 | + opener = build_opener(HTTPCookieProcessor()) |
| 109 | + opener.open(req) |
| 110 | + except HTTPError: |
| 111 | + print(errprefix + 'Incorrect username or password') |
| 112 | + errprefix = '' |
| 113 | + credentials = None |
| 114 | + username = None |
| 115 | + password = None |
| 116 | + |
| 117 | + return credentials |
| 118 | + |
| 119 | + |
| 120 | +def build_version_query_params(version): |
| 121 | + desired_pad_length = 3 |
| 122 | + if len(version) > desired_pad_length: |
| 123 | + print('Version string too long: "{0}"'.format(version)) |
| 124 | + quit() |
| 125 | + |
| 126 | + version = str(int(version)) # Strip off any leading zeros |
| 127 | + query_params = '' |
| 128 | + |
| 129 | + while len(version) <= desired_pad_length: |
| 130 | + padded_version = version.zfill(desired_pad_length) |
| 131 | + query_params += '&version={0}'.format(padded_version) |
| 132 | + desired_pad_length -= 1 |
| 133 | + return query_params |
| 134 | + |
| 135 | + |
| 136 | +def build_cmr_query_url(short_name, version, time_start, time_end, |
| 137 | + bounding_box=None, polygon=None, |
| 138 | + filename_filter=None): |
| 139 | + params = '&short_name={0}'.format(short_name) |
| 140 | + params += build_version_query_params(version) |
| 141 | + params += '&temporal[]={0},{1}'.format(time_start, time_end) |
| 142 | + if polygon: |
| 143 | + params += '&polygon={0}'.format(polygon) |
| 144 | + elif bounding_box: |
| 145 | + params += '&bounding_box={0}'.format(bounding_box) |
| 146 | + if filename_filter: |
| 147 | + option = '&options[producer_granule_id][pattern]=true' |
| 148 | + params += '&producer_granule_id[]={0}{1}'.format(filename_filter, option) |
| 149 | + return CMR_FILE_URL + params |
| 150 | + |
| 151 | + |
| 152 | +def cmr_download(urls): |
| 153 | + """Download files from list of urls.""" |
| 154 | + if not urls: |
| 155 | + return |
| 156 | + |
| 157 | + url_count = len(urls) |
| 158 | + print('Downloading {0} files...'.format(url_count)) |
| 159 | + credentials = None |
| 160 | + |
| 161 | + for index, url in enumerate(urls, start=1): |
| 162 | + if not credentials and urlparse(url).scheme == 'https': |
| 163 | + credentials = get_credentials(url) |
| 164 | + |
| 165 | + filename = url.split('/')[-1] |
| 166 | + print('{0}/{1}: {2}'.format(str(index).zfill(len(str(url_count))), |
| 167 | + url_count, |
| 168 | + filename)) |
| 169 | + |
| 170 | + try: |
| 171 | + # In Python 3 we could eliminate the opener and just do 2 lines: |
| 172 | + # resp = requests.get(url, auth=(username, password)) |
| 173 | + # open(filename, 'wb').write(resp.content) |
| 174 | + req = Request(url) |
| 175 | + if credentials: |
| 176 | + req.add_header('Authorization', 'Basic {0}'.format(credentials)) |
| 177 | + opener = build_opener(HTTPCookieProcessor()) |
| 178 | + data = opener.open(req).read() |
| 179 | + open(filename, 'wb').write(data) |
| 180 | + except HTTPError as e: |
| 181 | + print('HTTP error {0}, {1}'.format(e.code, e.reason)) |
| 182 | + except URLError as e: |
| 183 | + print('URL error: {0}'.format(e.reason)) |
| 184 | + except IOError: |
| 185 | + raise |
| 186 | + except KeyboardInterrupt: |
| 187 | + quit() |
| 188 | + |
| 189 | + |
| 190 | +def cmr_filter_urls(search_results): |
| 191 | + """Select only the desired data files from CMR response.""" |
| 192 | + if 'feed' not in search_results or 'entry' not in search_results['feed']: |
| 193 | + return [] |
| 194 | + |
| 195 | + entries = [e['links'] |
| 196 | + for e in search_results['feed']['entry'] |
| 197 | + if 'links' in e] |
| 198 | + # Flatten "entries" to a simple list of links |
| 199 | + links = list(itertools.chain(*entries)) |
| 200 | + |
| 201 | + urls = [] |
| 202 | + unique_filenames = set() |
| 203 | + for link in links: |
| 204 | + if 'href' not in link: |
| 205 | + # Exclude links with nothing to download |
| 206 | + continue |
| 207 | + if 'inherited' in link and link['inherited'] is True: |
| 208 | + # Why are we excluding these links? |
| 209 | + continue |
| 210 | + if 'rel' in link and 'data#' not in link['rel']: |
| 211 | + # Exclude links which are not classified by CMR as "data" or "metadata" |
| 212 | + continue |
| 213 | + |
| 214 | + if 'title' in link and 'opendap' in link['title'].lower(): |
| 215 | + # Exclude OPeNDAP links--they are responsible for many duplicates |
| 216 | + # This is a hack; when the metadata is updated to properly identify |
| 217 | + # non-datapool links, we should be able to do this in a non-hack way |
| 218 | + continue |
| 219 | + |
| 220 | + filename = link['href'].split('/')[-1] |
| 221 | + if filename in unique_filenames: |
| 222 | + # Exclude links with duplicate filenames (they would overwrite) |
| 223 | + continue |
| 224 | + unique_filenames.add(filename) |
| 225 | + |
| 226 | + urls.append(link['href']) |
| 227 | + |
| 228 | + return urls |
| 229 | + |
| 230 | + |
| 231 | +def cmr_search(short_name, version, time_start, time_end, |
| 232 | + bounding_box='', polygon='', filename_filter=''): |
| 233 | + |
| 234 | + """Perform a scrolling CMR query for files matching input criteria.""" |
| 235 | + cmr_query_url = build_cmr_query_url(short_name=short_name, version=version, |
| 236 | + time_start=time_start, time_end=time_end, |
| 237 | + bounding_box=bounding_box, |
| 238 | + polygon=polygon, filename_filter=filename_filter) |
| 239 | + print('Querying for data:\n\t{0}\n'.format(cmr_query_url)) |
| 240 | + |
| 241 | + cmr_scroll_id = None |
| 242 | + ctx = ssl.create_default_context() |
| 243 | + ctx.check_hostname = False |
| 244 | + ctx.verify_mode = ssl.CERT_NONE |
| 245 | + |
| 246 | + try: |
| 247 | + urls = [] |
| 248 | + while True: |
| 249 | + req = Request(cmr_query_url) |
| 250 | + if cmr_scroll_id: |
| 251 | + req.add_header('cmr-scroll-id', cmr_scroll_id) |
| 252 | + response = urlopen(req, context=ctx) |
| 253 | + if not cmr_scroll_id: |
| 254 | + # Python 2 and 3 have different case for the http headers |
| 255 | + headers = {k.lower(): v for k, v in dict(response.info()).items()} |
| 256 | + cmr_scroll_id = headers['cmr-scroll-id'] |
| 257 | + hits = int(headers['cmr-hits']) |
| 258 | + if hits > 0: |
| 259 | + print('Found {0} matches.'.format(hits)) |
| 260 | + else: |
| 261 | + print('Found no matches.') |
| 262 | + search_page = response.read() |
| 263 | + search_page = json.loads(search_page.decode('utf-8')) |
| 264 | + url_scroll_results = cmr_filter_urls(search_page) |
| 265 | + if not url_scroll_results: |
| 266 | + break |
| 267 | + if hits > CMR_PAGE_SIZE: |
| 268 | + print('.', end='') |
| 269 | + sys.stdout.flush() |
| 270 | + urls += url_scroll_results |
| 271 | + |
| 272 | + if hits > CMR_PAGE_SIZE: |
| 273 | + print() |
| 274 | + return urls |
| 275 | + except KeyboardInterrupt: |
| 276 | + quit() |
| 277 | + |
| 278 | + |
| 279 | +def polygon_coor(inpolygon): |
| 280 | + eps=0.75 # width for dilating and eroding (buffer) |
| 281 | + dist = 0.3 # threshold distance |
| 282 | + # read the original shapefile |
| 283 | + df = gpd.read_file(inpolygon) |
| 284 | + # create new result shapefile |
| 285 | + col = ['geometry'] |
| 286 | + res = gpd.GeoDataFrame(columns=col) |
| 287 | + df_explode=df.explode() |
| 288 | + dis=[] |
| 289 | + for i, j in list(itertools.combinations(df_explode.index, 2)): |
| 290 | + distance = df_explode.geometry[i].distance(df_explode.geometry[j]) |
| 291 | + # distance between polygons i and j in the shapefile |
| 292 | + dis.append(distance) |
| 293 | + if distance < dist: |
| 294 | + e = MultiPolygon([df_explode.geometry[i],df_explode.geometry[j]]) |
| 295 | + fx = e.buffer(eps, 1, join_style=JOIN_STYLE.mitre).buffer(-eps, 1, join_style=JOIN_STYLE.mitre) |
| 296 | + res = res.append({'geometry':fx},ignore_index=True) |
| 297 | + res_explode = res.explode() |
| 298 | + res_explode = gpd.GeoDataFrame({'geometry':unary_union(res_explode["geometry"])}) |
| 299 | + res_explode["area"] = res_explode['geometry'].area |
| 300 | + #Compute 95 percentile of the area value as the major polygon |
| 301 | + include_area=np.percentile(res_explode["area"].to_numpy(),95) |
| 302 | + res_explode1=res_explode[res_explode["area"]>include_area] |
| 303 | + #Simplify the shape |
| 304 | + res_explode1=res_explode1.simplify(0.05, preserve_topology=True) |
| 305 | + coordinates=[] |
| 306 | + #Collect coordinates for the verticies |
| 307 | + for i in res_explode1: |
| 308 | + coordlist=list(zip(i.exterior.coords.xy[0],i.exterior.coords.xy[1])) |
| 309 | + #print (coordlist) |
| 310 | + for j in coordlist: |
| 311 | + coordinates.insert(0,round(j[1],6)) |
| 312 | + coordinates.insert(0,round(j[0],6)) |
| 313 | + # save the resulting shapefile to disk |
| 314 | + res_explode1.crs = df.crs |
| 315 | + res_explode1.to_file(os.path.join(os.path.dirname(inpolygon),os.path.basename(inpolygon)[:-4]+"_simplified.shp")) |
| 316 | + #coordinates.reverse() |
| 317 | + return (','.join(map(str, coordinates))) |
| 318 | + |
| 319 | + |
| 320 | +if __name__ == '__main__': |
| 321 | + |
| 322 | + inpolygon='F:/modis/crop/Alaska.shp' |
| 323 | + short_name = 'MYD10A1' |
| 324 | + version = '6' |
| 325 | + time_start = '2016-01-01T00:00:00Z' |
| 326 | + time_end = '2016-01-31T23:59:59Z' |
| 327 | + bounding_box = '' |
| 328 | + polygon = polygon_coor(inpolygon) |
| 329 | + filename_filter = '' |
| 330 | + url_list = [] |
| 331 | + |
| 332 | + CMR_URL = 'https://cmr.earthdata.nasa.gov' |
| 333 | + URS_URL = 'https://urs.earthdata.nasa.gov' |
| 334 | + CMR_PAGE_SIZE = 2000 |
| 335 | + CMR_FILE_URL = ('{0}/search/granules.json?provider=NSIDC_ECS' |
| 336 | + '&sort_key[]=start_date&sort_key[]=producer_granule_id' |
| 337 | + '&scroll=true&page_size={1}'.format(CMR_URL, CMR_PAGE_SIZE)) |
| 338 | + |
| 339 | + url_list = cmr_search(short_name, version, time_start, time_end, |
| 340 | + bounding_box=bounding_box, |
| 341 | + polygon=polygon, filename_filter=filename_filter) |
| 342 | + cmr_download(url_list) |
| 343 | + |
| 344 | + |
| 345 | + |
| 346 | + |
| 347 | + |
0 commit comments