Skip to content
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,11 @@ scripts/run_tests.sh

An example deep-link to initiate EDD downloads:

In dev:
In integration (note: for the get-links query param, the `cmr_request_params`
were url-encoded, and then the entire URL was url-encoded):

```
earthdata-download://startDownload?getLinks=https://dev.hermes.trst2284.dev.int.nsidc.org/api/get-links?cmr_request_params=foo&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://dev.hermes.trst2284.dev.int.nsidc.org/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
```

In integration:

```
earthdata-download://startDownload?getLinks=https://integration.nsidc.org/apps/data-access-tool/api/get-links?cmr_request_params=foo&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://integration.nsidc.org/apps/data-access-tool/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
earthdata-download://startDownload?getLinks=https%3A//integration.nsidc.org/apps/data-access-tool/api/get-links%3Fcmr_request_params%3Dprovider%253DNSIDC_CPRD%2526page_size%253D5%2526sort_key%255B%255D%253D-start_date%2526sort_key%255B%255D%253Dproducer_granule_id%2526short_name%253DATL06%2526version%253D6%2526version%253D06%2526version%253D006%2526temporal%255B%255D%253D2018-10-14T00%253A00%253A00Z%252C2025-02-25T00%253A25%253A20Z%2526bounding_box%253D-180%252C-90%252C180%252C90%2526options%255Bproducer_granule_id%255D%255Bpattern%255D%253Dtrue%2526producer_granule_id%255B%255D%253D%252AATL06_2024%252A_0804%252A_006_01.h5%252A&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://integration.nsidc.org/apps/data-access-tool/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
```

A button needs to be added to the Data Access Tool that will issue a GET request
Expand Down
60 changes: 31 additions & 29 deletions src/dat_backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,45 +182,47 @@ def post(self) -> Any:
return response


# TODO: re-add this and make use of it. This gives the swagger interface more
# information and helps to document the API.
# GET_LINKS_DOC: Final[frx.model.Model] = api.model(
# "get_links",
# {
# "cmr_request_params": frx.fields.String(
# description="CMR Request parameters as a string",
# example="provider=NSIDC_ECS&page_size=2000&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-19T20:51:37Z&bounding_box=-101.94,57.71,-90.21,61.13",
# required=True,
# ),
# "cursor": frx.fields.String(
# description="CMR search results cursor",
# example='1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203',
# required=False,
# ),
# },
# )
get_links_response_model = api.model(
"GetLinksResponse",
{
"links": frx.fields.List(
frx.fields.String(
example="https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/006/2024/02/09/ATL06_20240209110944_08042201_006_01.h5"
)
),
"done": frx.fields.Boolean(example=False),
"cursor": frx.fields.String(
example='[1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203]'
),
},
)


@api.route("/api/get-links")
class GetLinks(frx.Resource): # type: ignore[misc]

@api.response(200, "Success")
# @api.expect(GET_LINKS_DOC) # type: ignore
@api.marshal_with(get_links_response_model, mask=False)
@api.response(*RESPONSE_CODES[200])
@api.response(*RESPONSE_CODES[500])
@api.param(
"cmr_request_params",
description="CMR Request parameters as a string",
example=r"provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=\*ATL06_2024\*_0804\*_006_01.h5\*",
required=True,
)
@api.param(
"cursor",
description="CMR search results cursor",
example='[1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203]',
required=False,
)
def get(self):
# cmr_request_params = api.payload["cmr_request_params"]
cmr_request_params = request.args.get("cmr_request_params")
cmr_request_params = request.args["cmr_request_params"]
cursor = request.args.get("cursor")
# cursor = api.payload.get("cursor")
app.logger.info(
f"get_links received successfully: {cmr_request_params=} {cursor=}"
)

# TODO: remove hard-coded params. We may need to break these out into
# individual args instead of just passing a query string...encoding this
# in a way that the earthdata downloader can pass the requests along is
# difficult. Not sure how to achieve yet.
cmr_request_params = "provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=*ATL06_2024*_0804*_006_01.h5*"

app.logger.info(f"get_links using {cursor=}")
links, cursor = get_links(
cmr_request_params=cmr_request_params,
Expand Down Expand Up @@ -326,7 +328,7 @@ def earthdata_token_exchange(authorization_code: Optional[str]) -> Dict[str, Any

@api.route("/api/earthdata/auth_callback")
class EarthdataAuthCallback(frx.Resource): # type: ignore[misc]
@api.response(*RESPONSE_CODES[302]) # type: ignore[misc]
@api.response(*RESPONSE_CODES[200]) # type: ignore[misc]
@api.response(*RESPONSE_CODES[500]) # type: ignore[misc]
def get(self) -> Response:
# Perform token exchange
Expand Down
4 changes: 3 additions & 1 deletion src/dat_backend/get_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@


def get_links(
*, cmr_request_params: str, search_after_cursor: str | None = None
*,
cmr_request_params: str,
search_after_cursor: str | None = None,
) -> tuple[list[str], str | None]:
request_url = CMR_GRANULES_URL + f"?{cmr_request_params}"

Expand Down
81 changes: 81 additions & 0 deletions test/unit/test_get_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from urllib.parse import quote

from dat_backend.app import app


app.testing = True


def _get_links_response(cursor: str | None = None):
"""Use a hard-coded set of CMR parameters to drive get-links.

Optionally accepts a cursor for paging through results.

The hard-coded CMR parameters match 31 granules. The parameters also specify
a page size of 5, ensuring that the first request will not return the entire
result set. The `test_get_links` test below passes the `cursor` after the first
request in order to test the typical interaction of Earthdata Download. See
the docstring of that function for more info.
"""
with app.test_client() as client:
# This request results in 31 granule results.
# TODO: this is currently driven by the `page_size` param given in the
# cmr request params. Eventually we may want to extract this as a
# separate query param that overrides whatever is given by the
# cmr_query_params.
cmr_request_params = "provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=*ATL06_2024*_0804*_006_01.h5*"
url_encoded_cmr_request_params = quote(cmr_request_params)
cursor_query_param = ""
if cursor:
cursor_query_param = f"&cursor={cursor}"
result = client.get(
f"/api/get-links?cmr_request_params={url_encoded_cmr_request_params}{cursor_query_param}"
)

assert result.status_code == 200

return result


def test_get_links():
"""Test the get-links endpoint.

This test seeks to replicate the behavior that the Earthdata Downloader
expects (see
https://github.com/nasa/earthdata-download/blob/main/docs/GET_LINKS.md).

The test makes a `get-links` request utilizing the same query parameters
that match 31 granules with a page size of 5.

The test gets the first set of results, then requests the next with the
cursor provided by the previous request, and does so until the result set is
exhausted. We assert that the `done` status is correctly set, and that the
total list of links returned by `get-links` matches what we expect.
"""
result = _get_links_response()
assert "cursor" in result.json
assert "links" in result.json
assert "done" in result.json

# Assert that we have the expected number of data links
data_links = [link for link in result.json["links"] if link.endswith(".h5")]
assert len(data_links) == 5
assert result.json["done"] is False

for idx in range(1, 7 + 1):
if result.json["done"]:
# 31 ganules, 5 granules per page.
raise RuntimeError(f"Expected only 7 pages of results. Done after {idx}")

result = _get_links_response(result.json["cursor"])
data_links.extend(
[link for link in result.json["links"] if link.endswith(".h5")]
)

# The result should be done.
assert result.json["done"]
# Ensure that each new page provides a new set of links.
assert len(data_links) == len(set(data_links))

# We expect a total of 31 granule results for the query.
assert len(data_links) == 31