diff --git a/.gitignore b/.gitignore index bdc368c..63de216 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ example_data/ *.h5 *.nc _freeze +.venv/ diff --git a/_quarto.yml b/_quarto.yml index 170c787..90ca188 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -27,8 +27,8 @@ website: href: index.qmd - section: "Getting Started" contents: - - text: "How to use this cookbook" - href: our-cookbook.qmd + - text: "Getting an Earthdata Login" + href: reference-guides/get-edl.qmd - text: "Tools we use" href: tools-we-use.qmd # - text: "Quick Start Guide" @@ -36,51 +36,56 @@ website: - section: "NSIDC Data" # href: reference-guides/data-orient-overview.qmd contents: - - text: "Types of data" + - text: "Data types and formats" href: reference-guides/data-structures.qmd - - text: "Common file formats" - href: reference-guides/nsidc-file-formats.qmd - text: "NASA collections and granules" - text: "NSIDC map projections and grids" href: reference-guides/projections.qmd - - text: "Quirky Datasets" + - text: "Quirky datasets" href: reference-guides/datasets.qmd - section: "Working with Data" # href: reference-guides/working-with-data-overview.qmd contents: - - section: "Finding and Accessing Data" + - section: "Finding / accessing data" contents: - - text: "Getting an Earthdata Login" - # href: reference-guides/get-edl.qmd - text: "Data search and discovery" href: reference-guides/dataset-search-and-discovery.qmd - text: "Data access" - - section: "Wrangling Data" + href: reference-guides/data-access.qmd + - section: "Wrangling data" contents: - text: "Subsetting" - text: "Reformatting" - text: "Reprojecting" - text: "Resampling" - - section: "Visualizing Data" + - section: "Visualizing data" contents: - text: "Plotting data - time series, scatter plots" - text: "Plotting data on a map" href: reference-guides/plotting-data.qmd - - text: "Best Practices" - - section: "How do I..." + - section: "How-To Guides" href: how-to-guides/overview.qmd contents: - section: "Search for data" - section: "Access data" - section: "Reproject data" - - section: "Work with NetCDF files?" + - section: "Work with NetCDF files" contents: - - text: "get the bounding box of a netcdf file" + - text: "Get the bounding box of a NetCDF file" href: how-to-guides/netcdf_cf.qmd - - text: "get the latitude and longitudes for grid cells" + - text: "Get the latitude and longitudes for grid cells" href: how-to-guides/get_latitude_and_longitude.qmd + - section: "Tutorials / Workshops" + href: tutorials/tutorials_overview.qmd + contents: + - text: "Direct cloud access of ICESat-2 data" + href: tutorials/ATL06-direct-access.ipynb + - text: "Download ICESat-2 Sea Ice data" + href: tutorials/working_with_icesat2_sea_ice_data.ipynb - section: "Appendices" contents: + - text: "Contributing" + - text: "Best Practices" - text: "What is an API?" format: diff --git a/how-to-guides/netcdf_cf.qmd b/how-to-guides/netcdf_cf.qmd index 7434072..30d3ce3 100644 --- a/how-to-guides/netcdf_cf.qmd +++ b/how-to-guides/netcdf_cf.qmd @@ -1,5 +1,5 @@ --- -title: "How do I get the bounding box of a NetCDF file in latitude and longitude?" +title: "How to get the bounding box of a NetCDF file in latitude and longitude" author: Andrew P. Barrett date: last-modified --- diff --git a/how-to-guides/overview.qmd b/how-to-guides/overview.qmd index 3f2d64d..c1331e5 100644 --- a/how-to-guides/overview.qmd +++ b/how-to-guides/overview.qmd @@ -1,27 +1,23 @@ --- title: "How-To Guides" -author: Andy P. Barrett --- ## Introduction -This section of the cookbook contains **How To Guides** to help you +This section of the cookbook contains **How-To Guides** to help you solve a particular problem or task. You can think of these How-To -Guides as recipes. For the most part they are short. They are +Guides as programmatic recipes. For the most part they are short. They are written with the assumption that you know what you want to do and have -some understanding of programing. You can think of each How-To as a +some understanding of programming. You can think of each How-To as a building block that can be put together with other How-Tos to construct a workflow. -If you are just learning either programming or working in the cloud, -we recommend looking at the other chapters in this cookbook. - How-To Guides are organized into the following sections: -_Maybe add a short description of each section here_ -- How do I search for data; -- How do I accessing data; -- How do I reproject and resample data; -- How do I work with CF-compliant NetCDF files - - [How do I get the bounding box of a NetCDF file in latitude and longitude?](netcdf_cf.qmd) +- How to search for data (via `earthaccess`); +- How to access data (via `earthaccess`); +- How to reproject and resample data; +- How to work with CF-compliant NetCDF files: + - [How to get the bounding box of a NetCDF file in latitude and longitude](netcdf_cf.qmd) + - [How to get latitude and longitude for the grid cells of a NetCDF file](get_latitude_and_longitude.qmd) diff --git a/index.qmd b/index.qmd index b7630d0..d93e607 100644 --- a/index.qmd +++ b/index.qmd @@ -3,14 +3,35 @@ title: "NSIDC Data Cookbook" author: NSIDC --- -## Welcome - -Welcome to the NSIDC Data Cookbook! - > [!WARNING] > This cookbook is under active development. Major changes to the structure of > the book and its content are expected. We are striving to develop content > that is well-tested and peer-reviewed, but nothing contained here should be > expected to work correctly (or at all!) in this early phase. Many sections contain only an outline of the content. -> These sections will have content added at the project develops. +> These sections will have content added as the project develops. + +## Welcome to the NSIDC Data Cookbook! + +This Cookbook for National Snow and Ice Data Center Distributed Active Archive Center ([NSIDC DAAC](https://nsidc.org/data/data-programs/nsidc-daac)) Data is more Julia Child’s “Mastering the Art of French Cooking” than just a collection of recipes (or data tutorials). The aim is to not only provide easy to follow recipes for working with data but also to provide an understanding of the data managed by NSIDC and the tools available to work with the data. It is not an in-depth guide to the many datasets housed by NSIDC but instead is an introduction to the common types of data, file formats and data structures. The hope is that the cookbook will provide a guide and foundataion to help you *master the art of working with cryospheric data*. + +## How to use this cookbook + +If you are new to NASA or NSIDC data, begin with the **Getting Started** section. Beyond that, the cookbook is divided into four major sections: **NSIDC Data**, **Working with Data**, **How-To Guides**, and **Tutorials / Workshops**. We also include some **Appendices** that include a best practices guide for Python scripting, some background details on computing, and how to contribute to the cookbook (if you are so inclined!). + +The **NSIDC Data** section is an introduction to the types of data managed by NSIDC, the file formats used to store that data, coordinate reference systems and grids comon to NSIDC data, and a guide to NASA terminology for Earth science data. There is also an introduction to some “quirky” datasets that do not fit more common simple data structures or are just a little more difficult to work with. + +The **Working with Data** section is a guide to the tools and applications for accomplishing common steps in scientific workflows: finding and accessing data, wrangling data, and visualizing data. Wrangling data covers subsetting, reformatting, reprojecting and resampling data in preparation for analysis. + +The **How-To Guides** section is a list of recipes to acheive common tasks. It is intended a list of solutions to common scientific programming tasks. + +Lastly, the **Tutorials / Workshops** section serves to highlight content that was developed for past events or specific use cases. + +## About the NSIDC DAAC + +NSIDC has managed the NASA National Snow and Ice Data Center Distributed Active Archive Center (NSIDC DAAC) since 1993, archiving and distributing cryospheric and related geophysical data from NASA Earth-observing satellite missions, airborne campaigns and field observations. The NSIDC DAAC provides hundreds of free and open-access NASA Earth science data sets, detailed data documentation, data tools, resources and tutorials, as well as robust data user support services. These data can be used to study topics relating to snow cover, sea ice, ice sheets, ice shelves, glaciers, frozen ground, soil moisture, climate interactions, and more! + + + + + diff --git a/reference-guides/data-access.qmd b/reference-guides/data-access.qmd new file mode 100644 index 0000000..da50645 --- /dev/null +++ b/reference-guides/data-access.qmd @@ -0,0 +1,45 @@ +--- +title: "Data access" +--- + +#### **Prerequisite reminder** + +A [NASA Earthdata Login](https://urs.earthdata.nasa.gov/) is required for all data access methods, including download and direct cloud access. + +#### **NASA Earthdata Cloud** + +The NASA Earthdata Cloud is NASA’s cloud-based archive of Earth observation data. It is hosted by Amazon Web Services (AWS) in region us-west-2. Downloading data from the Earthdata Cloud to your local computer or storage system is and will continue to be free for users. You don't need an AWS account to download data. Alternatively, you can work directly with NSIDC DAAC holdings stored in Amazon S3 (the object storage service used by NASA Earthdata Cloud). This direct access method allows you stream data into memory and analyze it "in place", avoiding large transfers to your local computer. + +Options for accessing NASA Earthdata are outlined below: + +#### **Option 1: Browser-Based Download (No Coding Required)** + +If you prefer a graphical interface, use one of these web-based tools: + +[NASA Earthdata Search](https://search.earthdata.nasa.gov) - Explore, filter and customize data before downloading files. Instructions for using Earthdata Search to find and access data can be found in our Earthdata Search [guide](https://nsidc.org/data/user-resources/help-center/search-order-and-customize-nsidc-daac-data-nasa-earthdata-search). + +image + +Data Access Tool - The NSIDC Data Access Tool can be accessed from a data set's landing page. +Example: [ATLAS/ICESat-2 L2A Global Geolocated Photon Data. (ATL03, Version 7) Landing Page](https://nsidc.org/data/atl03/versions/7). In the right-hand menu on a data set landing page, click "Data Access & Tools". You will be directed to the top of a list of tool and service "cards" with links to various data access methods. Click on the card title "Data Access Tool" to be directed to the Data Access Tool interface for that particular data set. The Data Access Tool allows users to filter files within a data set using spatial bounds, temporal ranges, and filename wildcards. + +Help article on using the Data Access Tool: [https://nsidc.org/data/user-resources/help-center/filter-and-order-data-set-web-page-using-data-access-tool](https://nsidc.org/data/user-resources/help-center/filter-and-order-data-set-web-page-using-data-access-tool) + +#### **Option 2: Python (earthaccess Library)** + +The earthaccess Python library provides a streamlined way to search, authenticate, and download NASA Earthdata. It works both locally and in the cloud (e.g., on EC2 or JupyterHub environments). + +earthaccess [documentation](https://earthaccess.readthedocs.io/en/stable/) + +*Have reference/link to one of our "how do I's" here?* + +#### **Option 3: Command Line Tools (```wget```, ```curl```, PODAAC subscriber)** + +If you're comfortable using the terminal, command-line tools allow flexible and efficient downloading. + +```wget``` or ```curl``` – Download known files or batch download from an HTTPS URL list. +Learn to create .txt files of download links here: [Creating Text Files of HTTPS and S3 URLs for Earthdata Cloud Data Access](https://nsidc.org/data/user-resources/help-center/creating-text-files-https-and-s3-urls-earthdata-cloud-data-access) + +[PODAAC Data Subscriber](https://github.com/podaac/data-subscriber/blob/main/README.md) – A Python-based command-line tool that supports spatial and temporal filtering. Though designed for PODAAC, it can be adapted for NSIDC and other DAACs. + +All of these command line options are detailed in this help article: [Downloading Data from Earthdata Cloud to Your Local Computer Using the Command Line](https://nsidc.org/data/user-resources/help-center/downloading-data-earthdata-cloud-your-local-computer-using-command-line) \ No newline at end of file diff --git a/reference-guides/data-structures.qmd b/reference-guides/data-structures.qmd index c91a951..3559fb4 100644 --- a/reference-guides/data-structures.qmd +++ b/reference-guides/data-structures.qmd @@ -1,12 +1,23 @@ --- -title: "Data Structures" +title: "Data types and formats" --- -Describes common remote sensing data structures. +There are many common data types/ structures and terminology to go along with them. Some examples: -_We should add information for tabular data and Data Frames_ +- **Tabular** – rows and columns, often stored in CSV or TSV files. Each row is an observation, and each column is a variable (e.g., time, latitude, longitude, temperature). +- **Data Frames** – tabular data structures used in programming languages like R or Python (pandas). Data frames allow for more complex indexing, metadata, and transformations than simple tabular files. +- **Swath** – along-track measurements collected as the satellite passes over an area, usually irregular in shape and resolution. +- **Raster / Grids** – data organized into regular grid cells, each cell representing a spatial unit (e.g., 25 km × 25 km grid of snow cover). +- **Resampling** – methods for transforming data between swath, raster, or other structures (e.g., nearest neighbor, bilinear interpolation). -- Swath -- Along-track -- Raster/Grids -- Resampling \ No newline at end of file + +How to work with file formats commonly found at NSIDC: In most cases, it’s best to avoid low-level libraries such as `netCDF4` or `h5py`. Higher-level libraries provide more intuitive access, automatically handle metadata, and streamline analysis. Some format descriptions and reccomendations are in the table below. + +| File Format | Description | Recommended Tools | +| --------------------- | ------------------------------------------------------------------------------------------- | -------------------------------------------------------- | +| **NetCDF4 / NetCDFx** | Multidimensional climate/remote sensing data (time, lat, lon, variables). | `xarray` (`xr.open_dataset`) in Python; `terra` or `ncdf4` in R. | +| **HDF5** | Hierarchical format for storing arrays, tables, and metadata; used widely in NASA products. | `xarray`, `pandas`; avoid `h5py` unless necessary. | +| **HDF-EOS** | Earth Observing System variant of HDF, often with swath, grid, or point structures. | `xarray`, `h5netcdf`, NASA `harmony-py`. | +| **Shapefile** | Vector geospatial data (points, lines, polygons) with CRS support. | `geopandas` (Python); `sf` (R). | +| **GeoTIFF** | Georeferenced raster imagery and gridded data. | `rasterio`, `rioxarray` (Python); `terra`, `raster` (R). | +| **CSV/TSV** | Tabular text-based files, rows = observations, columns = variables. | `pandas` (Python); `readr`/`data.table`/`tibble` (R). | diff --git a/reference-guides/dataset-search-and-discovery.qmd b/reference-guides/dataset-search-and-discovery.qmd index f68c605..4a32843 100644 --- a/reference-guides/dataset-search-and-discovery.qmd +++ b/reference-guides/dataset-search-and-discovery.qmd @@ -2,8 +2,45 @@ title: "Dataset Search and Discovery" --- -- Getting an EDL -- A simple earthaccess search and download -- EDS -- curl -- wget \ No newline at end of file +### **NASA Common Metadata Repository** + +There are a variety of search and discovery methods allowing users to find NASA Earthdata from a graphical user interface (GUI) or using programmatic access methods, depending on preference. Any search and discovery method leverages the [NASA Common Metadata Repository](https://www.earthdata.nasa.gov/about/esdis/eosdis/cmr) to find data of interest. + +*NASA's Common Metadata Repository (CMR) is a high-performance, high-quality, continuously evolving metadata system that catalogs all data and service metadata records for NASA's Earth Observing System Data and Information System (EOSDIS) and will be the authoritative management system for all EOSDIS metadata. These metadata records are registered, modified, discovered, and accessed through programmatic interfaces leveraging standard protocols and APIs.* + +*CMR is the keystone that makes NASA's Earth observation data discoverable. As a metadata repository, CMR contains Unified Metadata Model (UMM) schema records that describe individual Earth data files (UMM-Granules), collections of files (UMM-Collections), scientific details about the data files (UMM-Variables), related tools and services that act on the data files (UMM-Tools and -Services), and pertinent relationships between these concepts. Using the UMM allows CMR to host its metadata records in several supported native formats, with translation services available between formats.* + +### **Search and Discovery Methods** + + +#### **Graphical User Interfaces** + + +##### **NASA Earthdata Search** + +All of NASA Earthdata are available through [NASA Earthdata Search](https://search.earthdata.nasa.gov), not just data archived through the NSIDC DAAC. [Help article](https://nsidc.org/data/user-resources/help-center/search-order-and-customize-nsidc-daac-data-nasa-earthdata-search) for using Earthdata Search. + +##### **NSIDC Data Access Tool** + +The NSIDC Data Access Tool is accessible from landing pages on the NSIDC website, and is an easy way to filter for files of interest without leaving nsidc.org. [Help article](https://nsidc.org/data/data-access-tool) for the NSIDC Data Access Tool. + +#### **Programmatic** + +The majority of the shorter form "How-To Guides" and longer form "Tutorials / Workshops" will present programmatic data access methods using Python. We hope to expand to other languages, such as R, in the future. + +**earthaccess Python library** + +`earthaccess` is a Python library to **search for** and **download** or **stream** NASA Earth science data with just a few lines of code. + +>*"earthaccess revolutionizes NASA data access by drastically reducing the complexity and code required. Since open science is a collaborative effort involving people from different technical backgrounds, our team took the approach that data analysis can and should be made more inclusive and accessible by reducing the complexities of underlying systems."* +> +>Luis López, an NSIDC software developer and earthaccess creator + +Instructions for searching for data using earthaccess [here](https://earthaccess.readthedocs.io/en/stable/user_guide/search/). + + + + + + + diff --git a/reference-guides/get-edl.qmd b/reference-guides/get-edl.qmd new file mode 100644 index 0000000..6d2b520 --- /dev/null +++ b/reference-guides/get-edl.qmd @@ -0,0 +1,9 @@ +--- +title: "NASA Earthdata Login" +--- + +NASA Earthdata are freely accessible to all users, but a NASA Earthdata Login is required for access. Users can register for an Earthdata Login at [https://urs.earthdata.nasa.gov/](https://urs.earthdata.nasa.gov/). + + +Some programmatic data access methods are simplified by setting up your Earthdata Login credentials in a netrc file for easy authentication. +Instructions for creating a netrc file can be found [here](https://nsidc.org/data/user-resources/help-center/creating-netrc-file-earthdata-login). diff --git a/tools-we-use.qmd b/tools-we-use.qmd index 071b6c4..17cf8ef 100644 --- a/tools-we-use.qmd +++ b/tools-we-use.qmd @@ -3,7 +3,7 @@ title: "Tools we use" #number-sections: true --- -Currently, this cookbook features Python Packages (@sec-pythonpkg) for working with data supported by NSIDC DAAC. It also features some applications (@sec-applications) that are accessible through a web browser or as stand alone packages that need to be installed on your local machine. +Currently, this cookbook features [Python Packages](#sec-pythonpkg) for working with data supported by NSIDC DAAC. It also features some [applications](#sec-applications) that are accessible through a web browser or as stand alone packages that need to be installed on your local machine. The focus on Python not only reflects the expertise of NSIDC DAAC but also reflects the popularity of Python within the Earth and atmospheric science communities. However, we recognize that many of our users are more familiar with other programming langauges such as R and Matlab. We hope that we will be able to include these langauges as the Cookbook develops. @@ -14,53 +14,86 @@ Using web or locally-installed applications is a good way to start to discover a There are many Python packages available for working with Earth science data. The packages we use in this Cookbook are an unashamedly opinionated selection; they are the tools we like to use. We also think that these tools are the easiest to use for the types of data mananaged by NSIDC DAAC. Most of the tools have been developed so that researchers do not have to worry about the low-level details of accessing and working with often complicated data used in Earth science. This reduces the amount of code you have to write and also reduces the number of mistakes you will inevitably make. #### earthaccess -[`earthaccess`](https://earthaccess.readthedocs.io/en/latest/) is a package to search for and access NASA Earth science data. + +[`earthaccess`](https://earthaccess.readthedocs.io/en/latest/) — search for and access NASA Earth science data. #### xarray -[`xarray`](https://xarray.dev/) is a package to work with N-dimensional data (e.g `(time,x,y,z)`). + +[`xarray`](https://xarray.dev/) — work with N-dimensional labeled data (e.g. `(time, x, y, z)`). #### rioxarray -[`rioxarray`](https://corteva.github.io/rioxarray/stable/index.html) is an extentsion to `xarray` that makes data "geospatially-aware". + +[`rioxarray`](https://corteva.github.io/rioxarray/stable/index.html) — add geospatial awareness to `xarray`. #### rasterio -[`rasterio`](https://rasterio.readthedocs.io/en/stable/intro.html) is a Python geospatial library for working with raster data. -#### Pandas -[`pandas`](https://pandas.pydata.org/docs/index.html) is a package to work with tabular data (e.g. the kind of data stored in spreadsheets or databases). +[`rasterio`](https://rasterio.readthedocs.io/en/stable/intro.html) — read, write, and process raster datasets. + +#### pandas + +[`pandas`](https://pandas.pydata.org/docs/index.html) — handle tabular data (spreadsheets, databases, CSV). + +#### geopandas -#### Geopandas -[`geopandas`](https://geopandas.org/en/stable/) is an extension to `pandas` to work with geospatial data. +[`geopandas`](https://geopandas.org/en/stable/) — extend `pandas` to work with geospatial vector data. #### cartopy -TBD + +[`cartopy`](https://scitools.org.uk/cartopy/docs/latest/) — map projections, transformations, and plotting for geospatial data. #### SlideRule -TBD + +[`SlideRule`](https://github.com/ICESat2-SlideRule/sliderule-python) — on-demand processing and access to ICESat-2 data. #### icepyx -TBD + +[`icepyx`](https://icepyx.readthedocs.io/en/latest/) — search, subset, and download ICESat-2 data with built-in metadata handling. #### satpy -TBD + +[`satpy`](https://satpy.readthedocs.io/en/stable/) — read, composite, and visualize meteorological satellite data. #### dask -TBD -## Installing Python Packages +[`dask`](https://www.dask.org/) — scale computations from laptops to clusters with parallel, out-of-core workflows. + +### Installing Python Packages + +Most Python packages can be installed from the [Python Package Index (PyPI)](https://pypi.org/) using [pip](https://pip.pypa.io/en/stable/). Some scientific and geospatial packages are easier to install with [conda](https://docs.conda.io/), particularly through the [conda-forge community channel](https://conda-forge.org/). +For more details, see the [official Python documentation on installing packages](https://docs.python.org/3/installing/index.html), the [pip documentation](https://pip.pypa.io/en/stable/getting-started/), and the [conda user guide](https://docs.conda.io/projects/conda/en/latest/user-guide/index.html). ## Applications {#sec-applications} +Applications provide graphical interfaces for exploring, visualizing, and working with Earth science data. Some run directly in a web browser, while others require installation on your local machine. They are useful for quickly examining datasets, generating plots or maps, and learning about data structures before moving on to scripted workflows. + #### Earthdata Search +[`Earthdata Search`](https://search.earthdata.nasa.gov/) — NASA’s primary web application for searching, filtering, and downloading Earth science data. + #### Open Altimetry +[`Open Altimetry`](https://openaltimetry.org/) — a browser-based tool for visualizing and accessing ICESat and ICESat-2 altimetry data. + #### HDFView +[`HDFView`](https://portal.hdfgroup.org/display/support/HDFView) — a desktop application for viewing and editing HDF4 and HDF5 files. + #### Panoply +[`Panoply`](https://www.giss.nasa.gov/tools/panoply/) — a NASA tool for visualizing netCDF, HDF, and GRIB files with quick map and plot options. + #### NCView +[`NCView`](http://meteora.ucsd.edu/~pierce/ncview_home_page.html) — a lightweight utility for quickly viewing netCDF files, especially time series of 2D variables. + #### QGIS -#### ArcGIS \ No newline at end of file +[`QGIS`](https://qgis.org/) — an open-source geographic information system for analyzing and visualizing spatial data. + +#### ArcGIS + +[`ArcGIS`](https://www.esri.com/en-us/arcgis/about-arcgis/overview) — a commercial GIS platform for advanced mapping, spatial analysis, and data management. + + + diff --git a/tutorials/ATL06-direct-access.ipynb b/tutorials/ATL06-direct-access.ipynb new file mode 100644 index 0000000..910376f --- /dev/null +++ b/tutorials/ATL06-direct-access.ipynb @@ -0,0 +1,402 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e0754304-7036-4530-83ec-86cec0f9886b", + "metadata": { + "tags": [] + }, + "source": [ + "\n", + "\n", + "# **Accessing and working with ICESat-2 data in the cloud**\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "## **1. Tutorial Overview**\n", + "\n", + "**Note: This is an updated version of the notebook that was presented to the NSIDC DAAC User Working Group in May 2022**\n", + "\n", + "This notebook demonstrates searching for cloud-hosted ICESat-2 data and directly accessing Land Ice Height (ATL06) granules from an Amazon Compute Cloud (EC2) instance using the `earthaccess` package. NASA data \"in the cloud\" are stored in Amazon Web Services (AWS) Simple Storage Service (S3) Buckets. **Direct Access** is an efficient way to work with data stored in an S3 Bucket when you are working in the cloud. Cloud-hosted granules can be opened and loaded into memory without the need to download them first. This allows you take advantage of the scalability and power of cloud computing. \n", + "\n", + "The Amazon Global cloud is divided into geographical regions. To have direct access to data stored in a region, our compute instance - a virtual computer that we create to perform processing operations in place of using our own desktop or laptop - must be in the same region as the data. This is a fundamental concept of _analysis in place_. **NASA cloud-hosted data is in Amazon Region us-west2. So your compute instance must also be in us-west2.** If we wanted to use data stored in another region, to use direct access for that data, we would start a compute instance in that region.\n", + "\n", + "As an example data collection, we use ICESat-2 Land Ice Height (ATL06) over the Juneau Icefield, AK, for March 2003. ICESat-2 data granules, including ATL06, are stored in HDF5 format. We demonstrate how to open an HDF5 granule and access data variables using `xarray`. Land Ice Heights are then plotted using `hvplot`. \n", + "\n", + "`earthaccess` is a package developed by Luis Lopez (NSIDC developer) to allow easy search of the NASA Common Metadata Repository (CMR) and download of NASA data collections. It can be used for programmatic search and access for both _DAAC-hosted_ and _cloud-hosted_ data. It manages authenticating using Earthdata Login credentials which are then used to obtain the S3 tokens that are needed for S3 direct access. https://github.com/nsidc/earthaccess\n", + "\n", + "\n", + "### **Credits**\n", + "\n", + "The notebook was created by Andy Barrett, NSIDC, updated by Jennifer Roebuck, NSIDC, and is based on notebooks developed by Luis Lopez and Mikala Beig, NSIDC.\n", + "\n", + "For questions regarding the notebook, or to report problems, please create a new issue in the [NSIDC-Data-Tutorials repo](https://github.com/nsidc/NSIDC-Data-Tutorials/issues).\n", + "\n", + "### **Learning Objectives**\n", + "\n", + "By the end of this demonstration you will be able to: \n", + "1. use `earthaccess` to search for ICESat-2 data using spatial and temporal filters and explore search results; \n", + "2. open data granules using direct access to the ICESat-2 S3 bucket; \n", + "3. load a HDF5 group into an `xarray.Dataset`; \n", + "4. visualize the land ice heights using `hvplot`. \n", + "\n", + "### **Prerequisites**\n", + "\n", + "1. An EC2 instance in the us-west-2 region. **NASA cloud-hosted data is in Amazon Region us-west2. So you also need an EC2 instance in the us-west-2 region.** An EC2 instance is a virtual computer that you create to perform processing operations in place of using your own desktop or laptop. Details on how to set up an instance can be found [here](https://nsidc.org/data/user-resources/help-center/nasa-earthdata-cloud-data-access-guide#anchor-1).\n", + "2. An Earthdata Login is required for data access. If you don't have one, you can register for one [here](https://urs.earthdata.nasa.gov/).\n", + "3. A .netrc file, that contains your Earthdata Login credentials, in your home directory. The current recommended practice for authentication is to create a .netrc file in your home directory following [these instructions](https://nsidc.org/support/how/how-do-i-programmatically-request-data-services) (Step 1) and to use the .netrc file for authentication when required for data access during the tutorial.\n", + "4. The *nsidc-tutorials* environment is setup and activated. This [README](https://github.com/nsidc/NSIDC-Data-Tutorials/blob/main/README.md) has setup instructions.\n", + "\n", + "### **Example of end product** \n", + "At the end of this tutorial, the following figure will be generated:\n", + " \n", + "![ATL06 land ice heights](./img/atl06_example_end_product.png)\n", + "### **Time requirement**\n", + "\n", + "Allow approximately 20 minutes to complete this tutorial." + ] + }, + { + "cell_type": "markdown", + "id": "816f31af", + "metadata": {}, + "source": [ + "## **2. Tutorial steps**\n", + "\n", + "## Import Packages\n", + "\n", + "The first step in any `python` script or notebook is to import packages. This tutorial requires the following packages:\n", + "- `earthaccess`, which enables Earthdata Login authentication and retrieves AWS credentials; enables collection and granule searches; and S3 access;\n", + "- `xarray`, used to load data;\n", + "- `hvplot`, used to visualize land ice height data." + ] + }, + { + "cell_type": "markdown", + "id": "d3b4c9e6", + "metadata": {}, + "source": [ + "We are going to import the whole `earthaccess` package.\n", + "\n", + "We will also import the whole `xarray` package but use a standard short name `xr`, using the `import as ` syntax. We could use anything for a short name but `xr` is an accepted standard that most `xarray` users are familiar with.\n", + "\n", + "We only need the `xarray` module from `hvplot` so we import that using the `import .` syntax." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b00b7a6e", + "metadata": {}, + "outputs": [], + "source": [ + "# For searching NASA data\n", + "import earthaccess\n", + "\n", + "# For reading data, analysis and plotting\n", + "import xarray as xr\n", + "import hvplot.xarray\n", + "import pprint" + ] + }, + { + "cell_type": "markdown", + "id": "15ae2994", + "metadata": {}, + "source": [ + "## Authenticate\n", + "\n", + "The first step is to get the correct authentication that will allow us to get _cloud-hosted_ ICESat-2 data. This is all done through Earthdata Login. The `login` method also gets the correct AWS credentials.\n", + "\n", + "Login requires your Earthdata Login username and password. The `login` method will automatically search for these credentials as environment variables or in a `.netrc` file, and if those aren't available it will prompt us to enter our username and password. We use a `.netrc` strategy. A `.netrc` file is a text file located in our home directory that contains login information for remote machines. If we don't have a `.netrc` file, `login` can create one for us.\n", + "\n", + "```\n", + "earthaccess.login(strategy='interactive', persist=True)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37d6a667", + "metadata": {}, + "outputs": [], + "source": [ + "auth = earthaccess.login()" + ] + }, + { + "cell_type": "markdown", + "id": "28d7b582", + "metadata": {}, + "source": [ + "## Search for ICESat-2 Collections\n", + "\n", + "`earthaccess` leverages the Common Metadata Repository (CMR) API to search for collections and granules. [Earthdata Search](https://search.earthdata.nasa.gov/search) also uses the CMR API.\n", + "\n", + "We can use the `search_datasets` method to search for ICESat-2 collections by setting `keyword='ICESat-2'`.\n", + "\n", + "This will display the number of data collections (data sets) that meet this search criteria." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e80e935", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "Query = earthaccess.search_datasets(keyword = 'ICESat-2')" + ] + }, + { + "cell_type": "markdown", + "id": "d3957627", + "metadata": {}, + "source": [ + "In this case there are 65 collections that have the keyword ICESat-2.\n", + "\n", + "The `search_datasets` method returns a python list of `DataCollection` objects. We can view the metadata for each collection in long form by passing a `DataCollection` object to print or as a summary using the `summary` method. We can also use the `pprint` function to Pretty Print each object.\n", + "\n", + "We will do this for the first 10 results (objects)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f54b13d9", + "metadata": {}, + "outputs": [], + "source": [ + "for collection in Query[:10]:\n", + " pprint.pprint(collection.summary(), sort_dicts=True, indent=4)\n", + " print('')\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "b88357e5", + "metadata": {}, + "source": [ + "For each collection, `summary` returns a subset of fields from the collection metadata and the Unified Metadata Model (UMM):\n", + "- `concept-id` is a unique id for the collection. It consists of an alphanumeric code and the provider-id specific to the DAAC (Distributed Active Archive Center). You can use the `concept_id` to search for data granules.\n", + "- `short_name` is a quick way of referring to a collection (instead of using the full title). It can be found on the collection landing page underneath the collection title after 'DATA SET ID'. See the table below for a list of the shortnames for ICESat-2 collections.\n", + "- `version` is the version of each collection.\n", + "- `file-type` gives information about the file format of the collection granules.\n", + "- `get-data` is a collection of URLs that can be used to access the data, collection landing pages and data tools. \n", + "- `cloud-info` this is for cloud-hosted data and provides additional information about the location of the S3 bucket that holds the data and where to get temporary AWS S3 credentials to access the S3 buckets. `earthaccess` handles these credentials and the links to the S3 buckets, so in general you won't need to worry about this information. \n", + "\n", + "For the ICESat-2 search results, within the concept-id, there is a provider-id; `NSIDC_ECS` and `NSIDC_CPRD`. `NSIDC_ECS` which is for the _on-prem_ collections and `NSIDC_CPRD` is for the _cloud-hosted_ collections. \n", + "\n", + "For ICESat-2, `ShortNames` are generally how different products are referred to.\n", + "\n", + "| ShortName | Product Description |\n", + "|:-----------:|:---------------------|\n", + "| ATL03 | ATLAS/ICESat-2 L2A Global Geolocated Photon Data |\n", + "| ATL06 | ATLAS/ICESat-2 L3A Land Ice Height |\n", + "| ATL07 | ATLAS/ICESat-2 L3A Sea Ice Height |\n", + "| ATL08 | ATLAS/ICESat-2 L3A Land and Vegetation Height |\n", + "| ATL09 | ATLAS/ICESat-2 L3A Calibrated Backscatter Profiles and Atmospheric Layer Characteristics |\n", + "| ATL10 | ATLAS/ICESat-2 L3A Sea Ice Freeboard |\n", + "| ATL11 | ATLAS/ICESat-2 L3B Slope-Corrected Land Ice Height Time Series |\n", + "| ATL12 | ATLAS/ICESat-2 L3A Ocean Surface Height |\n", + "| ATL13 | ATLAS/ICESat-2 L3A Along Track Inland Surface Water Data |" + ] + }, + { + "cell_type": "markdown", + "id": "fc62d6f6", + "metadata": {}, + "source": [ + "### Search for cloud-hosted data\n", + "For most collections, to search for only data in the cloud, the `cloud_hosted` method can be used. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "322d78c3", + "metadata": {}, + "outputs": [], + "source": [ + "Query = earthaccess.search_datasets(\n", + " keyword = 'ICESat-2',\n", + " cloud_hosted = True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8df10797", + "metadata": {}, + "source": [ + "## Search a data set using spatial and temporal filters \n", + "\n", + "We can use the `search_data` method to search for granules within a data set by location and time using spatial and temporal filters. In this example, we will search for data granules from the ATL06 verison 006 cloud-hosted data set over the Juneau Icefield, AK, for March and April 2020.\n", + "\n", + "The temporal range is identified with standard date strings, and latitude-longitude corners of a bounding box is specified. Polygons and points, as well as shapefiles can also be specified.\n", + "\n", + "This will display the number of granules that match our search. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fba5c34", + "metadata": {}, + "outputs": [], + "source": [ + "results = earthaccess.search_data(\n", + " short_name = 'ATL06',\n", + " version = '006',\n", + " cloud_hosted = True,\n", + " bounding_box = (-134.7,58.9,-133.9,59.2),\n", + " temporal = ('2020-03-01','2020-04-30'),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a7bc1b37", + "metadata": {}, + "source": [ + "To display the rendered metadata, including the download link, granule size and two images, we will use `display`. In the example below, all 4 results are shown. \n", + "\n", + "The download link is `https` and can be used download the granule to your local machine. This is similar to downloading _DAAC-hosted_ data but in this case the data are coming from the Earthdata Cloud. For NASA data in the Earthdata Cloud, there is no charge to the user for egress from AWS Cloud servers. This is not the case for other data in the cloud.\n", + "\n", + "Note the `[None, None, None, None]` that is displayed at the end can be ignored, it has no meaning in relation to the metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a04370d3", + "metadata": {}, + "outputs": [], + "source": [ + "[display(r) for r in results]" + ] + }, + { + "cell_type": "markdown", + "id": "810da59e", + "metadata": { + "tags": [] + }, + "source": [ + "## Use Direct-Access to open, load and display data stored on S3\n", + "\n", + "Direct-access to data from an S3 bucket is a two step process. First, the files are opened using the `open` method. The `auth` object created at the start of the notebook is used to provide Earthdata Login authentication and AWS credentials.\n", + "\n", + "The next step is to load the data. In this case, data are loaded into an `xarray.Dataset`. Data could be read into `numpy` arrays or a `pandas.Dataframe`. However, each granule would have to be read using a package that reads HDF5 granules such as `h5py`. `xarray` does this all _under-the-hood_ in a single line but for a single group in the HDF5 granule*.\n", + "\n", + "*ICESat-2 measures photon returns from 3 beam pairs numbered 1, 2 and 3 that each consist of a left and a right beam. In this case, we are interested in the left ground track (gt) of beam pair 1. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11205bbb", + "metadata": {}, + "outputs": [], + "source": [ + "files = earthaccess.open(results)\n", + "ds = xr.open_dataset(files[1], group='/gt1l/land_ice_segments')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75881751", + "metadata": {}, + "outputs": [], + "source": [ + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "1282ce34", + "metadata": {}, + "source": [ + "`hvplot` is an interactive plotting tool that is useful for exploring data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be7386c3", + "metadata": {}, + "outputs": [], + "source": [ + "ds['h_li'].hvplot(kind='scatter', s=2)" + ] + }, + { + "cell_type": "markdown", + "id": "0a4335c8", + "metadata": {}, + "source": [ + "## **3. Learning outcomes recap**\n", + "\n", + "We have learned how to:\n", + "1. use `earthaccess` to search for ICESat-2 data using spatial and temporal filters and explore search results;\n", + "2. open data granules using direct access to the ICESat-2 S3 bucket;\n", + "3. load a HDF5 group into an xarray.Dataset;\n", + "4. visualize the land ice heights using hvplot." + ] + }, + { + "cell_type": "markdown", + "id": "317ea6bd", + "metadata": {}, + "source": [ + "## **4. Additional resources**\n", + "\n", + "For general information about NSIDC DAAC data in the Earthdata Cloud: \n", + "\n", + "[FAQs About NSIDC DAAC's Earthdata Cloud Migration](https://nsidc.org/data/user-resources/help-center/faqs-about-nsidc-daacs-earthdata-cloud-migration)\n", + "\n", + "[NASA Earthdata Cloud Data Access Guide](https://nsidc.org/data/user-resources/help-center/nasa-earthdata-cloud-data-access-guide)\n", + "\n", + "Additional tutorials and How Tos:\n", + "\n", + "[NASA Earthdata Cloud Cookbook](https://nasa-openscapes.github.io/earthdata-cloud-cookbook/)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a67c7eb", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/tutorials_overview.qmd b/tutorials/tutorials_overview.qmd new file mode 100644 index 0000000..565827c --- /dev/null +++ b/tutorials/tutorials_overview.qmd @@ -0,0 +1,16 @@ +--- +title: "Tutorials / Workshops" +--- + +This section of the cookbook contains longer form tutorials providing examples of data search and access within a workflow. NSIDC is frequently involved in hackweeks and workshops as well. Content from past events can be found in the [NSIDC Data Tutorials Repository](https://github.com/nsidc/NSIDC-Data-Tutorials). + +**Current cookbook tutorials:** + +- [Direct cloud access of ICESat-2 (*ATL06 Land Ice Height*) data](/tutorials/ATL06-direct-access.ipynb): a notebook that demonstrates searching for cloud-hosted ICESat-2 data and directly accessing Land Ice Height (ATL06) granules from an Amazon Compute Cloud (EC2) instance using the `earthaccess` package. +- [Download of ICESat-2 (*ATL07 Sea Ice*) data](/tutorials/working_with_icesat2_sea_ice_data.ipynb): a notebook that demonstrates searching for and accessing ICESat-2 data using the Python `earthaccess` package, reading and visualizing the data using `xarray` and `pandas`, and that uses `matplotlib` and `cartopy` to produce a map of search results. + +**Tutorials coming soon:** + +- Ideas? +- *Let us know what you'd like to see!* + diff --git a/tutorials/working_with_icesat2_sea_ice_data.ipynb b/tutorials/working_with_icesat2_sea_ice_data.ipynb new file mode 100644 index 0000000..784561b --- /dev/null +++ b/tutorials/working_with_icesat2_sea_ice_data.ipynb @@ -0,0 +1,14127 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b693cb9e-7692-4d65-9688-8ec7b1636e3f", + "metadata": {}, + "source": [ + "# Working with ICESat-2 Sea Ice Products\n", + "\n", + "## Overview\n", + "\n", + "In this notebook, we demonstrate searching for and accessing ICESat-2 data using the Python `earthaccess` package, and reading and visualizing the data using `xarray` and `pandas`. We also use `matplotlib` and `cartopy` to produce a map of search results. \n", + "\n", + "`earthaccess` is a community developed open source Python package to streamline programmatic search and access for NASA data archives. Users can find data sets and data granules, and either download or \"stream\" NASA data in as little as three \"lines of code\", regardless of whether users are working in the cloud or on a local machine. The `earthaccess` package handles authentication for NASA Earthdata Login and the AWS hosted NASA Earthdata cloud. All you need is an Earthdata Login.\n", + "\n", + "`xarray` has become the go to Python package for Earth Data Science. With v2024.10.0, `xarray` can be used to read and work with data stored hiearchical file structures like the HDF5 file format used for ICESat-2, using the [`DataTree`](https://xarray.dev/blog/datatree) structure. We use [`xarray.DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html#xarray.DataTree) to read and explore ICESat-2 files.\n", + "\n", + "Although `xarray` could be used to work with the ICESat-2 data, the nested-group structure can be a little cumbersome. So we create a `pandas.DataFrame` object for a subset of data to make plotting easier.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "cebf284d-e1ca-4602-8bf1-d55718675710", + "metadata": {}, + "source": [ + "## Import libraries\n", + "\n", + "As with all Python, we import the libraries we will use." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6c2dc3a3-b0a9-4fcb-a1ba-d75a9dc2c8d0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mibe9765/miniforge3/envs/nsidc-tutorial-icesat2-apps/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "earthaccess: 0.14.0\n", + "xarray: 2025.7.1\n", + "cartopy: 0.25.0\n" + ] + } + ], + "source": [ + "# Search for data\n", + "import earthaccess\n", + "\n", + "# Read and work with data\n", + "import xarray as xr\n", + "import pandas as pd\n", + "\n", + "# To plot results\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.colors import ListedColormap, BoundaryNorm\n", + "from matplotlib.lines import Line2D\n", + "\n", + "# To plot map of results\n", + "import cartopy\n", + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "\n", + "# Check package versions: you may want to update if you have older versions\n", + "# See README.md\n", + "print(f\"earthaccess: {earthaccess.__version__}\")\n", + "print(f\"xarray: {xr.__version__}\")\n", + "print(f\"cartopy: {cartopy.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "id": "79c94fa1-0b8f-476d-96eb-e5a70cd0ac81", + "metadata": {}, + "source": [ + "## Authenticate\n", + "\n", + "Although you do not need an Earthdata login to search for NASA data, you do need one to access that data. It is better just to login at the start of a workflow so you don't forget.\n", + "\n", + "You will need an Earthdata login. If you don't have one, you can register for one, for free, [here](https://urs.earthdata.nasa.gov/users/new).\n", + "\n", + "`earthaccess` will prompt for your Earthdata login username and password. You can also set up a `.netrc` file or environment variables. `earthaccess` will search for these alternatives before prompting for a username and login. See the `earthaccess` [documentation](https://earthaccess.readthedocs.io/en/latest/user_guide/authenticate/) to lean how to do this." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "293c6a1e-b7a5-4c60-beab-e34de3ca7399", + "metadata": {}, + "outputs": [], + "source": [ + "auth = earthaccess.login()" + ] + }, + { + "cell_type": "markdown", + "id": "bbf8a2fe-cf35-4d9d-87d7-655a2121e7dd", + "metadata": {}, + "source": [ + "## Search for ICESat-2 Related Datasets\n", + "\n", + "Before we search for data, we want to know what ICESat-2 datasets and what versions of these datasets are available. We will also need to know the `short-name` or `concept-id` of the ICESat-2 dataset we want to use.\n", + "\n", + "The `short-name` can be found on the dataset landing pages for products or we can search for it.\n", + "\n", + "To search for datasets (or Collections as NASA calls them), we use the `search_datasets` method. This allows searches by keywords, platform, time range, spatial extent, version, and whether data are hosted in the cloud or still archived at a NASA DAAC.\n", + "\n", + "Here, we will do a simple search using `platform` for ICESat-2 data. The `platform` and `keyword` searches are not case sensitive. We'll add `downloadable=True` and `cloud_hosted=True` to further refine the search." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "63022c99-7ff9-41ef-8ac0-774f9481fc10", + "metadata": {}, + "outputs": [], + "source": [ + "results = earthaccess.search_datasets(\n", + " platform=\"icesat-2\",\n", + " downloadable=True,\n", + " cloud_hosted=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "cecd5fd1-ba0f-4c05-a821-f8e4a07132e0", + "metadata": {}, + "source": [ + "`search_datasets` returns a Python List of data collections. We can find how many datasets were found by getting the length of that list using `len`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4ef29d21-ad9e-4dfd-8a16-c5055c002f13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "47" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(results)" + ] + }, + { + "cell_type": "markdown", + "id": "70a2bd3a-4de3-478d-8f93-3cdb526ec528", + "metadata": {}, + "source": [ + "There are 47 datasets. Because `results` is a list, we can access any element of that list by giving an index. Here, we'll access the first element (`0`). Just change the index to see a different dataset. \n", + "\n", + "Each data collection has a `summary` method that returns a Python dictionary containing `short-name`, `concept-id`, and `version`, along with information about the file type and links to get the data. The file links are used by earthaccess, so we don't need to worry about these too much." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ecac52e9-9753-46bb-b748-d49750d2548b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'short-name': 'ATL03',\n", + " 'concept-id': 'C2596864127-NSIDC_CPRD',\n", + " 'version': '006',\n", + " 'file-type': \"[{'FormatType': 'Native', 'Format': 'HDF5', 'FormatDescription': 'HTTPS'}]\",\n", + " 'get-data': ['https://search.earthdata.nasa.gov/search/granules?p=C2596864127-NSIDC_CPRD',\n", + " 'https://cmr.earthdata.nasa.gov/virtual-directory/collections/C2596864127-NSIDC_CPRD',\n", + " 'https://nsidc.org/data/data-access-tool/ATL03/versions/6/'],\n", + " 'cloud-info': {'Region': 'us-west-2',\n", + " 'S3BucketAndObjectPrefixNames': ['nsidc-cumulus-prod-protected/ATLAS/ATL03/006',\n", + " 'nsidc-cumulus-prod-public/ATLAS/ATL03/006'],\n", + " 'S3CredentialsAPIEndpoint': 'https://data.nsidc.earthdatacloud.nasa.gov/s3credentials',\n", + " 'S3CredentialsAPIDocumentationURL': 'https://data.nsidc.earthdatacloud.nasa.gov/s3credentialsREADME'}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results[0].summary()" + ] + }, + { + "cell_type": "markdown", + "id": "34ab719f-024e-46ed-9045-afa1aa7d5d25", + "metadata": {}, + "source": [ + "We also want to be able to see all the other datasets available. Because there are a lot of datasets, we'll just get the `short-name` and `version`.\n", + "\n", + "We'll use a Python _list comprehension_, which is like a for-loop, to extract the information we want. We use the `sorted` function to sort the list into alphabetical order using the `short-name` (the first element of the _tuple_) as a key." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "19b2366c-a0e6-4dec-aa4b-3024501d9f4b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('ATL02', '007'),\n", + " ('ATL02', '006'),\n", + " ('ATL03', '006'),\n", + " ('ATL03', '007'),\n", + " ('ATL04', '007'),\n", + " ('ATL04', '006'),\n", + " ('ATL06', '006'),\n", + " ('ATL06', '007'),\n", + " ('ATL07', '006'),\n", + " ('ATL07', '007'),\n", + " ('ATL07QL', '007'),\n", + " ('ATL08', '006'),\n", + " ('ATL08', '007'),\n", + " ('ATL08QL', '007'),\n", + " ('ATL08QL', '006'),\n", + " ('ATL09', '006'),\n", + " ('ATL09', '007'),\n", + " ('ATL09QL', '007'),\n", + " ('ATL10', '006'),\n", + " ('ATL10', '007'),\n", + " ('ATL10QL', '007'),\n", + " ('ATL11', '006'),\n", + " ('ATL12', '006'),\n", + " ('ATL12', '007'),\n", + " ('ATL13', '006'),\n", + " ('ATL13', '007'),\n", + " ('ATL13QL', '007'),\n", + " ('ATL14', '004'),\n", + " ('ATL15', '004'),\n", + " ('ATL16', '005'),\n", + " ('ATL17', '005'),\n", + " ('ATL19', '003'),\n", + " ('ATL20', '004'),\n", + " ('ATL21', '003'),\n", + " ('ATL22', '003'),\n", + " ('ATL23', '001'),\n", + " ('ATL24', '001'),\n", + " ('Boreal_AGB_Density_ICESat2_2186', '1'),\n", + " ('CMS_Global_Forest_AGC_2180', '1'),\n", + " ('GEDI_ICESAT2_Global_Veg_Height_2294', '1'),\n", + " ('IS2ATBABD', '1'),\n", + " ('IS2CHM', '1'),\n", + " ('IS2GZANT', '1'),\n", + " ('IS2MPDDA', '3'),\n", + " ('IS2SITDAT4', '001'),\n", + " ('IS2SITMOGR4', '3'),\n", + " ('NSIDC-0782', '1')]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted(\n", + " [(r.summary()[\"short-name\"], r.summary()[\"version\"]) for r in results], \n", + " key=lambda x: x[0]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ebe4a069-5ca2-45ab-bd8b-bc5140fc6ed9", + "metadata": {}, + "source": [ + "The datasets with `short-names` that start with ATL are the standard ICESat-2 products. Some of these `short-names` have `QL` at the end. These are quick-look products. Also, most products have two versions. This is because the two most recent versions are archived." + ] + }, + { + "cell_type": "markdown", + "id": "b1452be2-1a0c-4b23-a35a-654817d75875", + "metadata": {}, + "source": [ + "## Search for ATL07 data granules\n", + "\n", + "Now that we know the product short_name, we can search for data. Here, I am interested in granules that were collected during the validation campaign. I know there was an underflight of ICESat-2 over sea ice on 26 July 2022, so we will search for ATL07 data for that date.\n", + "\n", + "To search for data, we use `earthaccess.search_data`. There are many ways to construct a search. Some examples are below.\n", + "\n", + "Currently, processing of ATL07 and ATL10 have been halted because of some issues with input data, so only version 006 is available." + ] + }, + { + "cell_type": "markdown", + "id": "56c86d4d-5617-4d65-8176-e4cde9c8abbf", + "metadata": {}, + "source": [ + "### By temporal range\n", + "\n", + "Searching using the `temporal` filter with `short-name` and `version` will return all data granules within the time range specified. \n", + "\n", + "The `temporal` keyword expects a tuple with two date-like variables. These can be strings following the format `YYYY-MM-DD` or `datetime` objects. Because we only want one day of data, the dates are the same.\n", + "\n", + "As with the datasets `results`, `search_data` returns a Python List so we can find the number of granules returned using `len`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0a9a6d1e-04b3-4b6c-b924-418dce9e4b22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "62" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "granules = earthaccess.search_data(\n", + " short_name=\"ATL07\",\n", + " temporal=(\"2022-07-26\",\"2022-07-26\"),\n", + " version=\"006\",\n", + ")\n", + "\n", + "len(granules)" + ] + }, + { + "cell_type": "markdown", + "id": "f8f91810-8db0-41e3-b808-5c73c084a129", + "metadata": {}, + "source": [ + "In a Jupyter notebook, we can get a rendering of information about a single granule, including some thumbnails of the location and data just by running a code-cell with one granule result." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "11440ca7-8648-43b9-b21b-b10541b30a01", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "

Data: ATL07-02_20220725225403_05201601_006_02.h5

\n", + "

Size: 68.2 MB

\n", + "

Cloud Hosted: True

\n", + "
\n", + "
\n", + " \"Data\"Data\n", + "
\n", + "
\n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + "Collection: {'EntryTitle': 'ATLAS/ICESat-2 L3A Sea Ice Height V006'}\n", + "Spatial coverage: {'HorizontalSpatialDomain': {'Orbit': {'AscendingCrossing': -18.55622198275336, 'StartLatitude': -27.0, 'StartDirection': 'D', 'EndLatitude': -27.0, 'EndDirection': 'A'}}}\n", + "Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2022-07-25T23:57:58.738Z', 'EndingDateTime': '2022-07-26T00:10:54.919Z'}}\n", + "Size(MB): 68.20199012756348\n", + "Data: ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL07/006/2022/07/25/ATL07-02_20220725225403_05201601_006_02.h5']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "granules[0]" + ] + }, + { + "cell_type": "markdown", + "id": "de449287-f5fb-4a63-bb9d-429e5d76ee72", + "metadata": {}, + "source": [ + "Or we can print a summary." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d7f2e854-bd9e-4679-b3e4-3d259b0a8cb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collection: {'EntryTitle': 'ATLAS/ICESat-2 L3A Sea Ice Height V006'}\n", + "Spatial coverage: {'HorizontalSpatialDomain': {'Orbit': {'AscendingCrossing': -18.55622198275336, 'StartLatitude': -27.0, 'StartDirection': 'D', 'EndLatitude': -27.0, 'EndDirection': 'A'}}}\n", + "Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2022-07-25T23:57:58.738Z', 'EndingDateTime': '2022-07-26T00:10:54.919Z'}}\n", + "Size(MB): 68.20199012756348\n", + "Data: ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL07/006/2022/07/25/ATL07-02_20220725225403_05201601_006_02.h5']\n" + ] + } + ], + "source": [ + "print(granules[0])" + ] + }, + { + "cell_type": "markdown", + "id": "3f34fcaa-4d73-4051-b535-2a9f594c5135", + "metadata": {}, + "source": [ + "We can list those granules in a similar way as we did with the results from `search_datasets`. We need to know a little about the structure of the granule results. Here, we print the list of granule file names." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b60179aa-9902-4fb7-a655-81b16d7dc9ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ATL07-02_20220725225403_05201601_006_02.h5',\n", + " 'ATL07-01_20220726002820_05211601_006_02.h5',\n", + " 'ATL07-02_20220726002820_05211601_006_02.h5',\n", + " 'ATL07-01_20220726020237_05221601_006_02.h5',\n", + " 'ATL07-02_20220726020237_05221601_006_02.h5',\n", + " 'ATL07-01_20220726033654_05231601_006_02.h5',\n", + " 'ATL07-02_20220726033654_05231601_006_02.h5',\n", + " 'ATL07-01_20220726051112_05241601_006_02.h5',\n", + " 'ATL07-02_20220726051112_05241601_006_02.h5',\n", + " 'ATL07-01_20220726064529_05251601_006_02.h5',\n", + " 'ATL07-02_20220726064529_05251601_006_02.h5',\n", + " 'ATL07-01_20220726081946_05261601_006_02.h5',\n", + " 'ATL07-02_20220726081946_05261601_006_02.h5',\n", + " 'ATL07-01_20220726095404_05271601_006_02.h5',\n", + " 'ATL07-02_20220726095404_05271601_006_02.h5',\n", + " 'ATL07-01_20220726112821_05281601_006_02.h5',\n", + " 'ATL07-02_20220726112821_05281601_006_02.h5',\n", + " 'ATL07-01_20220726130238_05291601_006_02.h5',\n", + " 'ATL07-02_20220726130238_05291601_006_02.h5',\n", + " 'ATL07-01_20220726143655_05301601_006_02.h5',\n", + " 'ATL07-02_20220726143655_05301601_006_02.h5',\n", + " 'ATL07-01_20220726161113_05311601_006_02.h5',\n", + " 'ATL07-02_20220726161113_05311601_006_02.h5',\n", + " 'ATL07-01_20220726174530_05321601_006_02.h5',\n", + " 'ATL07-02_20220726174530_05321601_006_02.h5',\n", + " 'ATL07-01_20220726191947_05331601_006_02.h5',\n", + " 'ATL07-02_20220726191947_05331601_006_02.h5',\n", + " 'ATL07-01_20220726205405_05341601_006_02.h5',\n", + " 'ATL07-02_20220726205405_05341601_006_02.h5',\n", + " 'ATL07-01_20220726222822_05351601_006_02.h5',\n", + " 'ATL07-02_20220726222822_05351601_006_02.h5',\n", + " 'SC:ATL07.006:274797041',\n", + " 'SC:ATL07.006:274810578',\n", + " 'SC:ATL07.006:274797086',\n", + " 'SC:ATL07.006:274810723',\n", + " 'SC:ATL07.006:274797043',\n", + " 'SC:ATL07.006:274810584',\n", + " 'SC:ATL07.006:274797009',\n", + " 'SC:ATL07.006:274798512',\n", + " 'SC:ATL07.006:274797045',\n", + " 'SC:ATL07.006:274798540',\n", + " 'SC:ATL07.006:274797028',\n", + " 'SC:ATL07.006:274798490',\n", + " 'SC:ATL07.006:274797109',\n", + " 'SC:ATL07.006:274798654',\n", + " 'SC:ATL07.006:274780570',\n", + " 'SC:ATL07.006:274798624',\n", + " 'SC:ATL07.006:274780664',\n", + " 'SC:ATL07.006:274798632',\n", + " 'SC:ATL07.006:274780446',\n", + " 'SC:ATL07.006:274798621',\n", + " 'SC:ATL07.006:274780413',\n", + " 'SC:ATL07.006:274798610',\n", + " 'SC:ATL07.006:274780610',\n", + " 'SC:ATL07.006:274798685',\n", + " 'SC:ATL07.006:274780646',\n", + " 'SC:ATL07.006:274798665',\n", + " 'SC:ATL07.006:274780792',\n", + " 'SC:ATL07.006:274778756',\n", + " 'SC:ATL07.006:274780724',\n", + " 'SC:ATL07.006:274778728',\n", + " 'SC:ATL07.006:274780593']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[g['umm']['GranuleUR'] for g in granules]" + ] + }, + { + "cell_type": "markdown", + "id": "b636ffce-c447-4785-a263-1cb2f062813a", + "metadata": {}, + "source": [ + "### Search By `bounding_box`\n", + "\n", + "We can further refine the search by adding a `bounding_box`. The coordinates of the bounding box are latitudes and longitudes in WGS84. The `bounding_box` is a tuple with `(min_lon, min_lat, max_lon, max_lat)`.\n", + "\n", + "Here, we search for ATL07 files in the Arctic." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "24396504-3930-4d27-9a96-f2bf0abd3772", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "granules = earthaccess.search_data(\n", + " short_name=\"ATL07\",\n", + " temporal=(\"2022-07-26\",\"2022-07-26\"),\n", + " version=\"006\",\n", + " bounding_box=(-180., 60., 180., 90.), # To restrict to N.Hem. only\n", + ")\n", + "len(granules)" + ] + }, + { + "cell_type": "markdown", + "id": "f7278212-8dd7-45bc-a0d2-66abd157fd27", + "metadata": {}, + "source": [ + "### Search By Polygon\n", + "\n", + "Searching by bounding-box does not always make sense in the Arctic, where meridians are converging. Defining a polygon might be more useful. \n", + "\n", + "The polygon argument is a Python List of longitude, latitude pairs, with the last pair of points matching the first point. For example:\n", + "\n", + "```\n", + "[(lon0,lat0), (lon1,lat1), (lon2,lat2), (lon3,lat3), (lon0,lat0)]\n", + "```\n", + "\n", + "The points have to be in counter-clockwise order. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a78e3efa-f652-47cd-bda8-98e8d447080e", + "metadata": {}, + "outputs": [], + "source": [ + "latp = [84, 85, 86.5, 85, 84]\n", + "lonp = [-80, -100, -100, -60, -80]\n", + "poly = [(x,y) for x, y in zip(lonp[::-1],latp[::-1])]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f606fe55-0edf-4859-bb99-e32569eabfe2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "granules = earthaccess.search_data(\n", + " short_name=\"ATL07\",\n", + " temporal=(\"2022-07-26\",\"2022-07-26\"),\n", + " version=\"006\",\n", + " polygon=poly,\n", + ")\n", + "len(granules)" + ] + }, + { + "cell_type": "markdown", + "id": "2900d86e-7673-4107-8c53-882a5cdce0dd", + "metadata": {}, + "source": [ + "This returns 4 granules." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ecbf4359-6a9a-4172-93ff-45cfb1a67bc6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ATL07-01_20220726002820_05211601_006_02.h5',\n", + " 'ATL07-01_20220726020237_05221601_006_02.h5',\n", + " 'ATL07-01_20220726161113_05311601_006_02.h5',\n", + " 'ATL07-01_20220726174530_05321601_006_02.h5',\n", + " 'SC:ATL07.006:274810578',\n", + " 'SC:ATL07.006:274810723',\n", + " 'SC:ATL07.006:274798610',\n", + " 'SC:ATL07.006:274798685']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[g['umm']['GranuleUR'] for g in granules]" + ] + }, + { + "cell_type": "markdown", + "id": "c4774a48-c113-4b32-a435-42fa5c89c5d3", + "metadata": {}, + "source": [ + "## Search for a particular RGT\n", + "\n", + "ICESat-2 data are often referenced by Reference Ground Tracks. Reference Grounds Tracks (RGT) are the imaginary line traced on the surface of the Earth as ICESat-2 passes overhead. There are 1387 RGT. Each RGT is followed once in every 91-day orbit cycle. RGT in different cycles are distinguished by a two digit cycle number. This information is in the file metadata and also encoded in the file name.\n", + "\n", + "`ATL07-[HH]_[yyyymmdd][hhmmss]_[ttttccss]_[vvv_rr].h5`\n", + "\n", + "where `tttt` is the four-digit RGT and `cc` is the cycle number.\n", + "\n", + "Below, we filter the granules to get RGT `0531` by spliting the filename on `_` and then looking for the third group of characters (index 2) that starts with `0531`." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b4ef8e3c-1eeb-432a-939f-6e07459d63ab", + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mIndexError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[16]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m g = [g \u001b[38;5;28;01mfor\u001b[39;00m g \u001b[38;5;129;01min\u001b[39;00m granules \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mg\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mumm\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mGranuleUR\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m2\u001b[39;49m\u001b[43m]\u001b[49m.startswith(\u001b[33m'\u001b[39m\u001b[33m0531\u001b[39m\u001b[33m'\u001b[39m)]\n\u001b[32m 2\u001b[39m g\n", + "\u001b[31mIndexError\u001b[39m: list index out of range" + ] + } + ], + "source": [ + "g = [g for g in granules if g[\"umm\"]['GranuleUR'].split('_')[2].startswith('0531')]\n", + "g" + ] + }, + { + "cell_type": "markdown", + "id": "7a792527-54c4-4899-9b41-184d07c6ff47", + "metadata": {}, + "source": [ + "### Search by Granule File Name\n", + "\n", + "We can also search for a particular granule. We still need to provide `short_name` or `concept_id` becase CMR does not allow searching across collections." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "18313ec2-1a1b-424e-b9a9-50bac6d63582", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Collection: {'EntryTitle': 'ATLAS/ICESat-2 L3A Sea Ice Height V006'}\n", + "Spatial coverage: {'HorizontalSpatialDomain': {'Orbit': {'AscendingCrossing': 81.6295895841921, 'StartLatitude': 27.0, 'StartDirection': 'A', 'EndLatitude': 27.0, 'EndDirection': 'D'}}}\n", + "Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2022-07-26T16:28:32.700Z', 'EndingDateTime': '2022-07-26T16:39:56.875Z'}}\n", + "Size(MB): 107.49841690063477\n", + "Data: ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL07/006/2022/07/26/ATL07-01_20220726161113_05311601_006_02.h5']]\n" + ] + } + ], + "source": [ + "granules = earthaccess.search_data(\n", + " short_name=\"ATL07\",\n", + " granule_ur=\"ATL07-01_20220726161113_05311601_006_02.h5\",\n", + ")\n", + "print(granules)" + ] + }, + { + "cell_type": "markdown", + "id": "c67c9c70-9532-42f4-ae0e-d489c1c4dfb7", + "metadata": {}, + "source": [ + "## Download the data\n", + "\n", + "We can either download data to our local machine or stream data directly into memory. Streaming data works well in the cloud.\n", + "\n", + "Below we download data. To stream data, we use the `earthaccess.open` method.\n", + "\n", + "```\n", + "files = earthaccess.open(granules)\n", + "```\n", + "\n", + "For `earthaccess.download`, files are downloaded to our current working directory or the directory specified in `local_path`. A list of the paths to these local files is returned.\n", + "\n", + "If we use `earthaccess.open`, `files` is a list of file-like objects." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "dde9d3eb-da94-4222-abc1-a2f953839d84", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "QUEUEING TASKS | : 100%|█████████████████████████| 1/1 [00:00<00:00, 365.10it/s]\n", + "PROCESSING TASKS | : 100%|████████████████████████| 1/1 [00:05<00:00, 5.39s/it]\n", + "COLLECTING RESULTS | : 100%|████████████████████| 1/1 [00:00<00:00, 6887.20it/s]\n" + ] + } + ], + "source": [ + "files = earthaccess.download(granules, local_path=\"./data\")" + ] + }, + { + "cell_type": "markdown", + "id": "f38f6a33-de8c-4abc-a750-6eaa633f918d", + "metadata": {}, + "source": [ + "## Read datafile using `xarray`\n", + "\n", + "We'll use `xarray.open_datatree` to open the file. Whether we use `earthaccess.download` or `earthaccess.open`, the list of file paths or file-like objects in `files` can be passed to `xarray` file readers. Currently, `xarray.open_datatree` will only open a single file, so we have to index `files`. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "240ff452-c0eb-4b76-acd4-d9335163e403", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'data/ATL07-01_20220726161113_05311601_006_02.h5'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "files[0]" + ] + }, + { + "cell_type": "markdown", + "id": "a328e17b-5e75-4961-bc73-cab0be4d59cf", + "metadata": {}, + "source": [ + "`decode_timedelta=True` is set so that we don't get a warning." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "25355435-0f98-47d2-9ba9-a8be77f40778", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DatasetView> Size: 0B\n",
+       "Dimensions:  ()\n",
+       "Data variables:\n",
+       "    *empty*\n",
+       "Attributes: (12/47)\n",
+       "    short_name:                         ATL07\n",
+       "    level:                              L3A\n",
+       "    title:                              SET_BY_META\n",
+       "    description:                        The data set (ATL07) contains along-t...\n",
+       "    Conventions:                        CF-1.6\n",
+       "    contributor_name:                   Ron Kwok (rkwok01@uw.edu), Alek Petty...\n",
+       "    ...                                 ...\n",
+       "    processing_level:                   2A\n",
+       "    references:                         http://nsidc.org/data/icesat2/data.html\n",
+       "    project:                            ICESat-2 > Ice, Cloud, and land Eleva...\n",
+       "    instrument:                         ATLAS > Advanced Topographic Laser Al...\n",
+       "    platform:                           ICESat-2 > Ice, Cloud, and land Eleva...\n",
+       "    source:                             Spacecraft
" + ], + "text/plain": [ + "\n", + "Group: /\n", + "│ Attributes: (12/47)\n", + "│ short_name: ATL07\n", + "│ level: L3A\n", + "│ title: SET_BY_META\n", + "│ description: The data set (ATL07) contains along-t...\n", + "│ Conventions: CF-1.6\n", + "│ contributor_name: Ron Kwok (rkwok01@uw.edu), Alek Petty...\n", + "│ ... ...\n", + "│ processing_level: 2A\n", + "│ references: http://nsidc.org/data/icesat2/data.html\n", + "│ project: ICESat-2 > Ice, Cloud, and land Eleva...\n", + "│ instrument: ATLAS > Advanced Topographic Laser Al...\n", + "│ platform: ICESat-2 > Ice, Cloud, and land Eleva...\n", + "│ source: Spacecraft\n", + "├── Group: /ancillary_data\n", + "│ │ Dimensions: (phony_dim_25: 1)\n", + "│ │ Dimensions without coordinates: phony_dim_25\n", + "│ │ Data variables: (12/25)\n", + "│ │ atlas_sdp_gps_epoch (phony_dim_25) datetime64[ns] 8B ...\n", + "│ │ control (phony_dim_25) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'sc_orient' (sc_orient_time: 1)> Size: 1B\n",
+       "[1 values with dtype=int8]\n",
+       "Coordinates:\n",
+       "  * sc_orient_time  (sc_orient_time) datetime64[ns] 8B 2022-07-25T23:30:00\n",
+       "Attributes:\n",
+       "    long_name:      Spacecraft Orientation\n",
+       "    units:          1\n",
+       "    source:         POD/PPD\n",
+       "    valid_min:      0\n",
+       "    valid_max:      2\n",
+       "    contentType:    referenceInformation\n",
+       "    description:    This parameter tracks the spacecraft orientation between ...\n",
+       "    flag_meanings:  backward forward transition\n",
+       "    flag_values:    [0 1 2]
" + ], + "text/plain": [ + " Size: 1B\n", + "[1 values with dtype=int8]\n", + "Coordinates:\n", + " * sc_orient_time (sc_orient_time) datetime64[ns] 8B 2022-07-25T23:30:00\n", + "Attributes:\n", + " long_name: Spacecraft Orientation\n", + " units: 1\n", + " source: POD/PPD\n", + " valid_min: 0\n", + " valid_max: 2\n", + " contentType: referenceInformation\n", + " description: This parameter tracks the spacecraft orientation between ...\n", + " flag_meanings: backward forward transition\n", + " flag_values: [0 1 2]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt[\"orbit_info\"][\"sc_orient\"]" + ] + }, + { + "cell_type": "markdown", + "id": "69231947-8c32-4c62-991c-317e8b383c88", + "metadata": {}, + "source": [ + "`sc_orient` is `1`, so the spacecraft is in the forward orientation. Left beams are weak and right beams are strong." + ] + }, + { + "cell_type": "markdown", + "id": "10259343-78a6-4ba4-80ac-99ad27a50923", + "metadata": {}, + "source": [ + "We will work with the first strong beam \"GT1R\".\n", + "\n", + "The datatree structure is a little cumbersome, and for this demonstration we only want a few variables, so we will load the data into a `pandas.DataFrame`.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c6d8ee24-3cef-4926-821f-25634cf7818e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
distanceheightlatitudessh_flagsurface_typequality
07.378386e+06NaN66.172199010
17.379108e+06NaN66.178631010
27.379192e+06NaN66.179382010
37.379263e+06NaN66.180009010
47.379278e+06NaN66.180148010
.....................
904411.223160e+07NaN70.277541010
904421.223179e+07NaN70.275878010
904431.223203e+07NaN70.273740010
904441.223227e+07NaN70.271634010
904451.223251e+07NaN70.269436010
\n", + "

90446 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " distance height latitude ssh_flag surface_type quality\n", + "0 7.378386e+06 NaN 66.172199 0 1 0\n", + "1 7.379108e+06 NaN 66.178631 0 1 0\n", + "2 7.379192e+06 NaN 66.179382 0 1 0\n", + "3 7.379263e+06 NaN 66.180009 0 1 0\n", + "4 7.379278e+06 NaN 66.180148 0 1 0\n", + "... ... ... ... ... ... ...\n", + "90441 1.223160e+07 NaN 70.277541 0 1 0\n", + "90442 1.223179e+07 NaN 70.275878 0 1 0\n", + "90443 1.223203e+07 NaN 70.273740 0 1 0\n", + "90444 1.223227e+07 NaN 70.271634 0 1 0\n", + "90445 1.223251e+07 NaN 70.269436 0 1 0\n", + "\n", + "[90446 rows x 6 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {\n", + " \"distance\": dt[\"gt1r\"][\"sea_ice_segments\"][\"seg_dist_x\"].values,\n", + " \"height\": dt[\"gt1r\"][\"sea_ice_segments\"][\"heights\"][\"height_segment_height\"].values,\n", + " \"latitude\": dt[\"gt1r\"][\"sea_ice_segments\"][\"latitude\"].values,\n", + " \"ssh_flag\": dt[\"gt1r\"][\"sea_ice_segments\"][\"heights\"][\"height_segment_ssh_flag\"].values,\n", + " \"surface_type\": dt[\"gt1r\"][\"sea_ice_segments\"][\"heights\"][\"height_segment_type\"].values,\n", + " \"quality\": dt[\"gt1r\"][\"sea_ice_segments\"][\"heights\"][\"height_segment_quality\"].values,\n", + " }\n", + ")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "a82e8314-2424-4416-9950-636f8cbc0aae", + "metadata": {}, + "source": [ + "## Plot the data" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "28def223-3c4f-46d9-8b45-006a5de6c15e", + "metadata": {}, + "outputs": [], + "source": [ + "# 0 - cloud\n", + "# 1 - snow/ice\n", + "# 2 - 5 Specular Lead\n", + "# 6 - 9 Dark Lead\n", + "surface = [\"Cloud\", \"Snow/Ice\", \"Specular Lead\", \"Dark Lead\"]\n", + "colors = [\"grey\", \"darkorange\", \"cyan\", \"darkblue\"]\n", + "bounds = [-0.5, .5, 1.5, 5.5, 9.5]\n", + "surface_type_cmap = ListedColormap(colors)\n", + "surface_type_norm = BoundaryNorm(boundaries=bounds, ncolors=4)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "dfa97406-846a-49d7-b30d-6ff35418b522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "m = df.plot.scatter(x=\"distance\", y=\"height\", c=\"surface_type\", \n", + " s=3, cmap=surface_type_cmap, norm=surface_type_norm,\n", + " colorbar=False, #)\n", + " xlim=(1.04e7,1.0405e7))\n", + "\n", + "handles = [Line2D([0], [0], linestyle='none', marker='o',\n", + " markerfacecolor=col, markeredgecolor='none',\n", + " markersize=10, label=sfc) for sfc, col in zip(surface, colors)]\n", + "m.legend(handles=handles)" + ] + }, + { + "cell_type": "markdown", + "id": "ec815ec9-eeac-4259-97a2-412b0cb82f64", + "metadata": {}, + "source": [ + "## Plot ICESat-2 Tracks\n", + "\n", + "It is always helpful to see where the data are located. We plot GT1R track on a map, with the `Polygon` used in `search_data`. This time we use latitude and longitude directly from the `DataTree` object." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "e9e2b175-5da8-48ca-a295-29d537c55f0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mibe9765/miniforge3/envs/nsidc-tutorial-icesat2-apps/lib/python3.12/site-packages/cartopy/io/__init__.py:242: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_ocean.zip\n", + " warnings.warn(f'Downloading: {url}', DownloadWarning)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Define NSIDC WGS84 North Polar Stereographic projection\n", + "map_projection = ccrs.Stereographic(\n", + " central_latitude=90., \n", + " central_longitude=-45,\n", + " true_scale_latitude=70.,\n", + ")\n", + "extent = [-1000000., 1000000., -2200000., 240000.]\n", + "\n", + "fig = plt.figure(figsize=(7,7))\n", + "ax = fig.add_subplot(projection=map_projection)\n", + "\n", + "ax.set_extent(extent, map_projection)\n", + "\n", + "ax.add_feature(cfeature.OCEAN)\n", + "ax.add_feature(cfeature.LAND)\n", + "\n", + "# Plot polygon\n", + "ax.plot(lonp, latp, transform=ccrs.PlateCarree())\n", + "\n", + "ax.plot(\n", + " dt[\"gt1r\"][\"sea_ice_segments\"][\"longitude\"][::100],\n", + " dt[\"gt1r\"][\"sea_ice_segments\"][\"latitude\"][::100],\n", + " transform=ccrs.PlateCarree(),\n", + ")\n", + "ax.gridlines(draw_labels=True, x_inline=False, y_inline=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8040291-0ca3-4210-a7c4-c39e1a5823df", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}