Skip to content

Commit c7fb62f

Browse files
Merge pull request #6 from nsidc/da-98-get-links-cmr-params
DA-98 get links cmr params
2 parents b52cd7b + 251f45a commit c7fb62f

File tree

4 files changed

+118
-38
lines changed

4 files changed

+118
-38
lines changed

README.md

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,11 @@ scripts/run_tests.sh
6767

6868
An example deep-link to initiate EDD downloads:
6969

70-
In dev:
70+
In integration (note: for the get-links query param, the `cmr_request_params`
71+
were url-encoded, and then the entire URL was url-encoded):
7172

7273
```
73-
earthdata-download://startDownload?getLinks=https://dev.hermes.trst2284.dev.int.nsidc.org/api/get-links?cmr_request_params=foo&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://dev.hermes.trst2284.dev.int.nsidc.org/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
74-
```
75-
76-
In integration:
77-
78-
```
79-
earthdata-download://startDownload?getLinks=https://integration.nsidc.org/apps/data-access-tool/api/get-links?cmr_request_params=foo&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://integration.nsidc.org/apps/data-access-tool/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
74+
earthdata-download://startDownload?getLinks=https%3A//integration.nsidc.org/apps/data-access-tool/api/get-links%3Fcmr_request_params%3Dprovider%253DNSIDC_CPRD%2526page_size%253D5%2526sort_key%255B%255D%253D-start_date%2526sort_key%255B%255D%253Dproducer_granule_id%2526short_name%253DATL06%2526version%253D6%2526version%253D06%2526version%253D006%2526temporal%255B%255D%253D2018-10-14T00%253A00%253A00Z%252C2025-02-25T00%253A25%253A20Z%2526bounding_box%253D-180%252C-90%252C180%252C90%2526options%255Bproducer_granule_id%255D%255Bpattern%255D%253Dtrue%2526producer_granule_id%255B%255D%253D%252AATL06_2024%252A_0804%252A_006_01.h5%252A&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://integration.nsidc.org/apps/data-access-tool/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
8075
```
8176

8277
A button needs to be added to the Data Access Tool that will issue a GET request

src/dat_backend/app.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -182,45 +182,47 @@ def post(self) -> Any:
182182
return response
183183

184184

185-
# TODO: re-add this and make use of it. This gives the swagger interface more
186-
# information and helps to document the API.
187-
# GET_LINKS_DOC: Final[frx.model.Model] = api.model(
188-
# "get_links",
189-
# {
190-
# "cmr_request_params": frx.fields.String(
191-
# description="CMR Request parameters as a string",
192-
# example="provider=NSIDC_ECS&page_size=2000&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-19T20:51:37Z&bounding_box=-101.94,57.71,-90.21,61.13",
193-
# required=True,
194-
# ),
195-
# "cursor": frx.fields.String(
196-
# description="CMR search results cursor",
197-
# example='1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203',
198-
# required=False,
199-
# ),
200-
# },
201-
# )
185+
get_links_response_model = api.model(
186+
"GetLinksResponse",
187+
{
188+
"links": frx.fields.List(
189+
frx.fields.String(
190+
example="https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/006/2024/02/09/ATL06_20240209110944_08042201_006_01.h5"
191+
)
192+
),
193+
"done": frx.fields.Boolean(example=False),
194+
"cursor": frx.fields.String(
195+
example='[1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203]'
196+
),
197+
},
198+
)
202199

203200

204201
@api.route("/api/get-links")
205202
class GetLinks(frx.Resource): # type: ignore[misc]
206203

207-
@api.response(200, "Success")
208-
# @api.expect(GET_LINKS_DOC) # type: ignore
204+
@api.marshal_with(get_links_response_model, mask=False)
205+
@api.response(*RESPONSE_CODES[200])
206+
@api.response(*RESPONSE_CODES[500])
207+
@api.param(
208+
"cmr_request_params",
209+
description="CMR Request parameters as a string",
210+
example=r"provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=\*ATL06_2024\*_0804\*_006_01.h5\*",
211+
required=True,
212+
)
213+
@api.param(
214+
"cursor",
215+
description="CMR search results cursor",
216+
example='[1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203]',
217+
required=False,
218+
)
209219
def get(self):
210-
# cmr_request_params = api.payload["cmr_request_params"]
211-
cmr_request_params = request.args.get("cmr_request_params")
220+
cmr_request_params = request.args["cmr_request_params"]
212221
cursor = request.args.get("cursor")
213-
# cursor = api.payload.get("cursor")
214222
app.logger.info(
215223
f"get_links received successfully: {cmr_request_params=} {cursor=}"
216224
)
217225

218-
# TODO: remove hard-coded params. We may need to break these out into
219-
# individual args instead of just passing a query string...encoding this
220-
# in a way that the earthdata downloader can pass the requests along is
221-
# difficult. Not sure how to achieve yet.
222-
cmr_request_params = "provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=*ATL06_2024*_0804*_006_01.h5*"
223-
224226
app.logger.info(f"get_links using {cursor=}")
225227
links, cursor = get_links(
226228
cmr_request_params=cmr_request_params,
@@ -326,7 +328,7 @@ def earthdata_token_exchange(authorization_code: Optional[str]) -> Dict[str, Any
326328

327329
@api.route("/api/earthdata/auth_callback")
328330
class EarthdataAuthCallback(frx.Resource): # type: ignore[misc]
329-
@api.response(*RESPONSE_CODES[302]) # type: ignore[misc]
331+
@api.response(*RESPONSE_CODES[200]) # type: ignore[misc]
330332
@api.response(*RESPONSE_CODES[500]) # type: ignore[misc]
331333
def get(self) -> Response:
332334
# Perform token exchange

src/dat_backend/get_links.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77

88
def get_links(
9-
*, cmr_request_params: str, search_after_cursor: str | None = None
9+
*,
10+
cmr_request_params: str,
11+
search_after_cursor: str | None = None,
1012
) -> tuple[list[str], str | None]:
1113
request_url = CMR_GRANULES_URL + f"?{cmr_request_params}"
1214

test/unit/test_get_links.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from urllib.parse import quote
2+
3+
from dat_backend.app import app
4+
5+
6+
app.testing = True
7+
8+
9+
def _get_links_response(cursor: str | None = None):
10+
"""Use a hard-coded set of CMR parameters to drive get-links.
11+
12+
Optionally accepts a cursor for paging through results.
13+
14+
The hard-coded CMR parameters match 31 granules. The parameters also specify
15+
a page size of 5, ensuring that the first request will not return the entire
16+
result set. The `test_get_links` test below passes the `cursor` after the first
17+
request in order to test the typical interaction of Earthdata Download. See
18+
the docstring of that function for more info.
19+
"""
20+
with app.test_client() as client:
21+
# This request results in 31 granule results.
22+
# TODO: this is currently driven by the `page_size` param given in the
23+
# cmr request params. Eventually we may want to extract this as a
24+
# separate query param that overrides whatever is given by the
25+
# cmr_query_params.
26+
cmr_request_params = "provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=*ATL06_2024*_0804*_006_01.h5*"
27+
url_encoded_cmr_request_params = quote(cmr_request_params)
28+
cursor_query_param = ""
29+
if cursor:
30+
cursor_query_param = f"&cursor={cursor}"
31+
result = client.get(
32+
f"/api/get-links?cmr_request_params={url_encoded_cmr_request_params}{cursor_query_param}"
33+
)
34+
35+
assert result.status_code == 200
36+
37+
return result
38+
39+
40+
def test_get_links():
41+
"""Test the get-links endpoint.
42+
43+
This test seeks to replicate the behavior that the Earthdata Downloader
44+
expects (see
45+
https://github.com/nasa/earthdata-download/blob/main/docs/GET_LINKS.md).
46+
47+
The test makes a `get-links` request utilizing the same query parameters
48+
that match 31 granules with a page size of 5.
49+
50+
The test gets the first set of results, then requests the next with the
51+
cursor provided by the previous request, and does so until the result set is
52+
exhausted. We assert that the `done` status is correctly set, and that the
53+
total list of links returned by `get-links` matches what we expect.
54+
"""
55+
result = _get_links_response()
56+
assert "cursor" in result.json
57+
assert "links" in result.json
58+
assert "done" in result.json
59+
60+
# Assert that we have the expected number of data links
61+
data_links = [link for link in result.json["links"] if link.endswith(".h5")]
62+
assert len(data_links) == 5
63+
assert result.json["done"] is False
64+
65+
for idx in range(1, 7 + 1):
66+
if result.json["done"]:
67+
# 31 ganules, 5 granules per page.
68+
raise RuntimeError(f"Expected only 7 pages of results. Done after {idx}")
69+
70+
result = _get_links_response(result.json["cursor"])
71+
data_links.extend(
72+
[link for link in result.json["links"] if link.endswith(".h5")]
73+
)
74+
75+
# The result should be done.
76+
assert result.json["done"]
77+
# Ensure that each new page provides a new set of links.
78+
assert len(data_links) == len(set(data_links))
79+
80+
# We expect a total of 31 granule results for the query.
81+
assert len(data_links) == 31

0 commit comments

Comments
 (0)