Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions recipes/antgg-2015/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Name for dataset. User chosen.
title: 'ANTGG-2015'
# Description of dataset. User chosen, roughly 1 sentence in length.
description: 'ANTGG dataset converted to zarr stores from a netCDF file'
# Version of pangeo_forge_recipes library that was used
pangeo_forge_version: '0.10.4'
# The recipes section tells Pangeo Cloud where to find the recipes within your PR.
# Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below.
# If your PR contains multiple recipes, you may add additional elements to the list below.
recipes:
# User chosen name for recipe. Likely similar to dataset name, ~25 characters in length
- id: antgg-2015
# The `object` below tells Pangeo Cloud specifically where your recipe instance(s) are located and uses the format <filename>:<object_name>
# <filename> is name of .py file where the Python recipe object is defined.
# For example, if <filename> is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR.
# <object_name> is the name of the recipe object (i.e. Python class instance) _within_ the specified file.
# For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"`
object: recipe:transforms
provenance:
# Data provider object. Follow STAC spec.
# https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object
providers:
- name: 'PANGAEA'
description: 'Data Publisher for Earth & Environmental Science'
roles:
- licensor
url: https://doi.pangaea.de/10.1594/PANGAEA.848168
# This is a required field for provider. Follow STAC spec
# https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license
license: 'CC-BY-3.0'
maintainers:
# Information about recipe creator. name and github are required
- name: 'Matthew Tankersley'
orcid: '0000-0003-4266-8554'
github: mdtanker
# The specific bakery (i.e. cloud infrastructure) that your recipe will run on.
# Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries
# bakery:
# id: 'pangeo-ldeo-nsf-earthcube'
62 changes: 62 additions & 0 deletions recipes/antgg-2015/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import apache_beam as beam
import zarr

from pangeo_forge_recipes.patterns import FilePattern
from pangeo_forge_recipes.transforms import (
Indexed,
OpenURLWithFSSpec,
OpenWithXarray,
StoreToZarr,
T,
)


def make_url():
return 'https://hs.pangaea.de/Maps/antgg2015/antgg2015.nc'


pattern = FilePattern(make_url)


def test_ds(store: zarr.storage.FSStore) -> zarr.storage.FSStore:
# This fails integration test if not imported here
import xarray as xr

ds = xr.open_dataset(store, engine='zarr', consolidated=True, chunks={})
for var in [
'ellipsoidal_height',
'orthometric_height',
'free_air_anomaly',
'accuracy_measure',
'bouguer_anomaly',
]:
assert var in ds.data_vars


class Preprocess(beam.PTransform):
"""Preprocessor transform."""

@staticmethod
def _preproc(item: Indexed[T]) -> Indexed[T]:
index, ds = item
ds['x'] = ds.x * 1000
ds['y'] = ds.y * 1000
ds = ds.drop(['longitude', 'latitude', 'crs'])
return index, ds

def expand(self, pcoll: beam.PCollection) -> beam.PCollection:
return pcoll | beam.Map(self._preproc)


transforms = (
beam.Create(pattern.items())
| OpenURLWithFSSpec()
| OpenWithXarray(
file_type=pattern.file_type,
)
| Preprocess()
| StoreToZarr(
store_name='antgg2015.zarr',
)
| 'Test dataset' >> beam.Map(test_ds)
)