nsidc · trey-stafford · Mar 14, 2025 · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/README.md b/README.md
@@ -67,16 +67,11 @@ scripts/run_tests.sh
 
 An example deep-link to initiate EDD downloads:
 
-In dev:
+In integration (note: for the get-links query param, the `cmr_request_params`
+were url-encoded, and then the entire URL was url-encoded):
 
 ```
-earthdata-download://startDownload?getLinks=https://dev.hermes.trst2284.dev.int.nsidc.org/api/get-links?cmr_request_params=foo&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://dev.hermes.trst2284.dev.int.nsidc.org/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
-```
-
-In integration:
-
-```
-earthdata-download://startDownload?getLinks=https://integration.nsidc.org/apps/data-access-tool/api/get-links?cmr_request_params=foo&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://integration.nsidc.org/apps/data-access-tool/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
+earthdata-download://startDownload?getLinks=https%3A//integration.nsidc.org/apps/data-access-tool/api/get-links%3Fcmr_request_params%3Dprovider%253DNSIDC_CPRD%2526page_size%253D5%2526sort_key%255B%255D%253D-start_date%2526sort_key%255B%255D%253Dproducer_granule_id%2526short_name%253DATL06%2526version%253D6%2526version%253D06%2526version%253D006%2526temporal%255B%255D%253D2018-10-14T00%253A00%253A00Z%252C2025-02-25T00%253A25%253A20Z%2526bounding_box%253D-180%252C-90%252C180%252C90%2526options%255Bproducer_granule_id%255D%255Bpattern%255D%253Dtrue%2526producer_granule_id%255B%255D%253D%252AATL06_2024%252A_0804%252A_006_01.h5%252A&downloadId=atl06_06&clientId=data_access_tool&authUrl=https://integration.nsidc.org/apps/data-access-tool/api/earthdata/auth?eddRedirect=earthdata-download%3A%2F%2FauthCallback
 ```
 
 A button needs to be added to the Data Access Tool that will issue a GET request

diff --git a/src/dat_backend/app.py b/src/dat_backend/app.py
@@ -182,45 +182,47 @@ def post(self) -> Any:
         return response
 
 
-# TODO: re-add this and make use of it. This gives the swagger interface more
-# information and helps to document the API.
-# GET_LINKS_DOC: Final[frx.model.Model] = api.model(
-#     "get_links",
-#     {
-#         "cmr_request_params": frx.fields.String(
-#             description="CMR Request parameters as a string",
-#             example="provider=NSIDC_ECS&page_size=2000&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-19T20:51:37Z&bounding_box=-101.94,57.71,-90.21,61.13",
-#             required=True,
-#         ),
-#         "cursor": frx.fields.String(
-#             description="CMR search results cursor",
-#             example='1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203',
-#             required=False,
-#         ),
-#     },
-# )
+get_links_response_model = api.model(
+    "GetLinksResponse",
+    {
+        "links": frx.fields.List(
+            frx.fields.String(
+                example="https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/006/2024/02/09/ATL06_20240209110944_08042201_006_01.h5"
+            )
+        ),
+        "done": frx.fields.Boolean(example=False),
+        "cursor": frx.fields.String(
+            example='[1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203]'
+        ),
+    },
+)
 
 
 @api.route("/api/get-links")
 class GetLinks(frx.Resource):  # type: ignore[misc]
 
-    @api.response(200, "Success")
-    # @api.expect(GET_LINKS_DOC)  # type: ignore
+    @api.marshal_with(get_links_response_model, mask=False)
+    @api.response(*RESPONSE_CODES[200])
+    @api.response(*RESPONSE_CODES[500])
+    @api.param(
+        "cmr_request_params",
+        description="CMR Request parameters as a string",
+        example=r"provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=\*ATL06_2024\*_0804\*_006_01.h5\*",
+        required=True,
+    )
+    @api.param(
+        "cursor",
+        description="CMR search results cursor",
+        example='[1638327816913,"atl06_20211201030329_10641303_006_01.h5",2706594203]',
+        required=False,
+    )
     def get(self):
-        # cmr_request_params = api.payload["cmr_request_params"]
-        cmr_request_params = request.args.get("cmr_request_params")
+        cmr_request_params = request.args["cmr_request_params"]
         cursor = request.args.get("cursor")
-        # cursor = api.payload.get("cursor")
         app.logger.info(
             f"get_links received successfully: {cmr_request_params=} {cursor=}"
         )
 
-        # TODO: remove hard-coded params. We may need to break these out into
-        # individual args instead of just passing a query string...encoding this
-        # in a way that the earthdata downloader can pass the requests along is
-        # difficult. Not sure how to achieve yet.
-        cmr_request_params = "provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=*ATL06_2024*_0804*_006_01.h5*"
-
         app.logger.info(f"get_links using {cursor=}")
         links, cursor = get_links(
             cmr_request_params=cmr_request_params,
@@ -326,7 +328,7 @@ def earthdata_token_exchange(authorization_code: Optional[str]) -> Dict[str, Any
 
 @api.route("/api/earthdata/auth_callback")
 class EarthdataAuthCallback(frx.Resource):  # type: ignore[misc]
-    @api.response(*RESPONSE_CODES[302])  # type: ignore[misc]
+    @api.response(*RESPONSE_CODES[200])  # type: ignore[misc]
     @api.response(*RESPONSE_CODES[500])  # type: ignore[misc]
     def get(self) -> Response:
         # Perform token exchange

diff --git a/src/dat_backend/get_links.py b/src/dat_backend/get_links.py
@@ -6,7 +6,9 @@
 
 
 def get_links(
-    *, cmr_request_params: str, search_after_cursor: str | None = None
+    *,
+    cmr_request_params: str,
+    search_after_cursor: str | None = None,
 ) -> tuple[list[str], str | None]:
     request_url = CMR_GRANULES_URL + f"?{cmr_request_params}"
 

diff --git a/test/unit/test_get_links.py b/test/unit/test_get_links.py
@@ -0,0 +1,81 @@
+from urllib.parse import quote
+
+from dat_backend.app import app
+
+
+app.testing = True
+
+
+def _get_links_response(cursor: str | None = None):
+    """Use a hard-coded set of CMR parameters to drive get-links.
+
+    Optionally accepts a cursor for paging through results.
+
+    The hard-coded CMR parameters match 31 granules. The parameters also specify
+    a page size of 5, ensuring that the first request will not return the entire
+    result set. The `test_get_links` test below passes the `cursor` after the first
+    request in order to test the typical interaction of Earthdata Download. See
+    the docstring of that function for more info.
+    """
+    with app.test_client() as client:
+        # This request results in 31 granule results.
+        # TODO: this is currently driven by the `page_size` param given in the
+        # cmr request params. Eventually we may want to extract this as a
+        # separate query param that overrides whatever is given by the
+        # cmr_query_params.
+        cmr_request_params = "provider=NSIDC_CPRD&page_size=5&sort_key[]=-start_date&sort_key[]=producer_granule_id&short_name=ATL06&version=6&version=06&version=006&temporal[]=2018-10-14T00:00:00Z,2025-02-25T00:25:20Z&bounding_box=-180,-90,180,90&options[producer_granule_id][pattern]=true&producer_granule_id[]=*ATL06_2024*_0804*_006_01.h5*"
+        url_encoded_cmr_request_params = quote(cmr_request_params)
+        cursor_query_param = ""
+        if cursor:
+            cursor_query_param = f"&cursor={cursor}"
+        result = client.get(
+            f"/api/get-links?cmr_request_params={url_encoded_cmr_request_params}{cursor_query_param}"
+        )
+
+        assert result.status_code == 200
+
+    return result
+
+
+def test_get_links():
+    """Test the get-links endpoint.
+
+    This test seeks to replicate the behavior that the Earthdata Downloader
+    expects (see
+    https://github.com/nasa/earthdata-download/blob/main/docs/GET_LINKS.md).
+
+    The test makes a `get-links` request utilizing the same query parameters
+    that match 31 granules with a page size of 5.
+
+    The test gets the first set of results, then requests the next with the
+    cursor provided by the previous request, and does so until the result set is
+    exhausted. We assert that the `done` status is correctly set, and that the
+    total list of links returned by `get-links` matches what we expect.
+    """
+    result = _get_links_response()
+    assert "cursor" in result.json
+    assert "links" in result.json
+    assert "done" in result.json
+
+    # Assert that we have the expected number of data links
+    data_links = [link for link in result.json["links"] if link.endswith(".h5")]
+    assert len(data_links) == 5
+    assert result.json["done"] is False
+
+    for idx in range(1, 7 + 1):
+        if result.json["done"]:
+            # 31 ganules, 5 granules per page.
+            raise RuntimeError(f"Expected only 7 pages of results. Done after {idx}")
+
+        result = _get_links_response(result.json["cursor"])
+        data_links.extend(
+            [link for link in result.json["links"] if link.endswith(".h5")]
+        )
+
+    # The result should be done.
+    assert result.json["done"]
+    # Ensure that each new page provides a new set of links.
+    assert len(data_links) == len(set(data_links))
+
+    # We expect a total of 31 granule results for the query.
+    assert len(data_links) == 31