Skip to content

Commit e187ae0

Browse files
authored
Merge pull request #443 from nsidc/auth-439
fixing searching for restricted datasets and accessing ASF on demand data from Opera
2 parents 5805b14 + d20f713 commit e187ae0

File tree

7 files changed

+1078
-966
lines changed

7 files changed

+1078
-966
lines changed

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Changelog
22

3+
## [Unreleased]
4+
5+
* Bug fixes:
6+
* fixed #439 by implementing more trusted domains in the SessionWithRedirection
7+
* fixed #438 by using an authenticated session for hits()
8+
* Enhancements:
9+
* addressing #427 by adding parameters to collection query
10+
311
## [v0.8.2] 2023-12-06
412
* Bug fixes:
513
* Enable AWS check with IMDSv2
@@ -167,7 +175,7 @@
167175
- Add basic classes to interact with NASA CMR, EDL and cloud access.
168176
- Basic object formatting.
169177

170-
[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.5.2...HEAD
178+
[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.8.2...HEAD
171179
[v0.5.2]: https://github.com/nsidc/earthaccess/releases/tag/v0.5.2
172180
[v0.5.1]: https://github.com/nsidc/earthaccess/releases/tag/v0.5.1
173181
[v0.5.0]: https://github.com/nsidc/earthaccess/releases/tag/v0.4.0

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ With *earthaccess* we can login, search and download data with a few lines of co
6565

6666
The only requirement to use this library is to open a free account with NASA [EDL](https://urs.earthdata.nasa.gov).
6767

68-
<a href="https://urs.earthdata.nasa.gov"><img src="https://auth.ops.maap-project.org/cas/images/urs-logo.png" /></a>
69-
7068

7169
### **Authentication**
7270

binder/environment-dev.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
dependencies:
55
# This environment bootstraps poetry, the actual dev environment
66
# is installed and managed with poetry
7-
- python=3.9
7+
- python=3.10
88
- jupyterlab=3
99
- xarray>=0.19
1010
- ipyleaflet>=0.13

earthaccess/auth.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import getpass
2+
import importlib.metadata
23
import logging
34
import os
45
from netrc import NetrcParseError
@@ -11,22 +12,35 @@
1112

1213
from .daac import DAACS
1314

15+
try:
16+
user_agent = f"earthaccess v{importlib.metadata.version('earthaccess')}"
17+
except importlib.metadata.PackageNotFoundError:
18+
user_agent = "earthaccess"
19+
20+
1421
logger = logging.getLogger(__name__)
1522

1623

1724
class SessionWithHeaderRedirection(requests.Session):
1825
"""
1926
Requests removes auth headers if the redirect happens outside the
2027
original req domain.
21-
This is taken from https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
2228
"""
2329

24-
AUTH_HOST = "urs.earthdata.nasa.gov"
30+
AUTH_HOSTS: List[str] = [
31+
"urs.earthdata.nasa.gov",
32+
"cumulus.asf.alaska.edu",
33+
"sentinel1.asf.alaska.edu",
34+
"nisar.asf.alaska.edu",
35+
"datapool.asf.alaska.edu",
36+
]
2537

2638
def __init__(
2739
self, username: Optional[str] = None, password: Optional[str] = None
2840
) -> None:
2941
super().__init__()
42+
self.headers.update({"User-Agent": user_agent})
43+
3044
if username and password:
3145
self.auth = (username, password)
3246

@@ -39,11 +53,13 @@ def rebuild_auth(self, prepared_request: Any, response: Any) -> None:
3953
if "Authorization" in headers:
4054
original_parsed = urlparse(response.request.url)
4155
redirect_parsed = urlparse(url)
42-
if (
43-
(original_parsed.hostname != redirect_parsed.hostname)
44-
and redirect_parsed.hostname != self.AUTH_HOST
45-
and original_parsed.hostname != self.AUTH_HOST
56+
if (original_parsed.hostname != redirect_parsed.hostname) and (
57+
redirect_parsed.hostname not in self.AUTH_HOSTS
58+
or original_parsed.hostname not in self.AUTH_HOSTS
4659
):
60+
logger.debug(
61+
f"Deleting Auth Headers: {original_parsed.hostname} -> {redirect_parsed.hostname}"
62+
)
4763
del headers["Authorization"]
4864
return
4965

@@ -208,7 +224,7 @@ def get_session(self, bearer_token: bool = True) -> requests.Session:
208224
Returns:
209225
class Session instance with Auth and bearer token headers
210226
"""
211-
session = requests.Session()
227+
session = SessionWithHeaderRedirection()
212228
if bearer_token and self.authenticated:
213229
# This will avoid the use of the netrc after we are logged in
214230
session.trust_env = False

earthaccess/search.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,16 @@ def hits(self) -> int:
6060
Returns:
6161
The number of results reported by CMR.
6262
"""
63-
return super().hits()
63+
url = self._build_url()
64+
65+
response = self.session.get(url, headers=self.headers, params={"page_size": 0})
66+
67+
try:
68+
response.raise_for_status()
69+
except exceptions.HTTPError as ex:
70+
raise RuntimeError(ex.response.text)
71+
72+
return int(response.headers["CMR-Hits"])
6473

6574
def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]:
6675
"""Filter by concept ID.
@@ -107,6 +116,39 @@ def doi(self, doi: str) -> Type[CollectionQuery]:
107116
self.params["doi"] = doi
108117
return self
109118

119+
def instrument(self, instrument: str) -> Type[CollectionQuery]:
120+
"""Searh datasets by instrument
121+
122+
???+ Tip
123+
Not all datasets have an associated instrument. This works
124+
only at the dataset level but not the granule (data) level.
125+
126+
Parameters:
127+
instrument (String): instrument of a datasets, e.g. instrument=GEDI
128+
"""
129+
if not isinstance(instrument, str):
130+
raise TypeError("instrument must be of type str")
131+
132+
self.params["instrument"] = instrument
133+
return self
134+
135+
def project(self, project: str) -> Type[CollectionQuery]:
136+
"""Searh datasets by associated project
137+
138+
???+ Tip
139+
Not all datasets have an associated project. This works
140+
only at the dataset level but not the granule (data) level.
141+
Will return datasets across DAACs matching the project.
142+
143+
Parameters:
144+
project (String): associated project of a datasets, e.g. project=EMIT
145+
"""
146+
if not isinstance(project, str):
147+
raise TypeError("project must be of type str")
148+
149+
self.params["project"] = project
150+
return self
151+
110152
def parameters(self, **kwargs: Any) -> Type[CollectionQuery]:
111153
"""Provide query parameters as keyword arguments. The keyword needs to match the name
112154
of the method, and the value should either be the value or a tuple of values.

0 commit comments

Comments
 (0)