Skip to content

Commit d9e336b

Browse files
Merge pull request #23 from DACCS-Climate/add-tests
Add tests
2 parents 7976f97 + e94645a commit d9e336b

File tree

14 files changed

+606
-112
lines changed

14 files changed

+606
-112
lines changed

.github/workflows/tests.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: Unit tests
2+
on:
3+
pull_request:
4+
types: [opened, synchronize, reopened, ready_for_review]
5+
jobs:
6+
test:
7+
if: github.event.pull_request.draft == false
8+
runs-on: ubuntu-latest
9+
steps:
10+
- uses: actions/checkout@v3
11+
- name: Set up python for testing
12+
uses: actions/setup-python@v4
13+
with:
14+
python-version: "3.x"
15+
cache: 'pip'
16+
- name: Install python test dependencies
17+
run: |
18+
pip install .[test]
19+
- name: Test with pytest
20+
run: |
21+
pytest ./tests/

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ The first thing to do is to get a `client` object:
2121
All the information about the network can now be retrieved from the `client` object. E.g. the nodes available in the network can be accessed as:
2222
```python
2323
>>> client.nodes
24-
{'UofT': <marble_client.node.MarbleNode at 0x10c129990>,
25-
'PAVICS': <marble_client.node.MarbleNode at 0x10c6dd690>,
26-
'Hirondelle': <marble_client.node.MarbleNode at 0x10c6dd890>}
24+
{'UofTRedOak': <MarbleNode(id: 'UofTRedOak', name: 'Red Oak')>,
25+
'PAVICS': <MarbleNode(id: 'PAVICS', name: 'PAVICS')>,
26+
'Hirondelle': <MarbleNode(id: 'Hirondelle', name: 'Hirondelle')>}
2727
```
2828
The returned object is a python `dict` with node names for keys and `MarbleNode` objects as values. A particular node can be accessed as:
2929

3030
```python
31-
>>> mynode = client['UofT']
31+
>>> mynode = client['UofTRedOak']
3232
>>> type(mynode)
3333
marble_client.node.MarbleNode
3434
```
@@ -43,7 +43,7 @@ True
4343
The URL for the node can be retrieved as:
4444
```python
4545
>>> mynode.url
46-
'https://daccs.cs.toronto.edu'
46+
'https://redoak.cs.toronto.edu'
4747
```
4848

4949
Various other qualities about the node can be accessed as shown below (see [implementation](https://github.com/DACCS-Climate/marble_client_python/blob/main/marble_client/node.py) for the full list of available attributes).
@@ -121,7 +121,7 @@ Get the node your notebook/script is currently running on:
121121
```python
122122
>>> client = MarbleClient()
123123
>>> client.this_node
124-
<marble_client.node.MarbleNode at 0x10c129990>
124+
<MarbleNode(id: 'UofTRedOak', name: 'Red Oak')>
125125
```
126126

127127
Add session cookies to a `requests.Session` object. This means that any request made with that session variable will

marble_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .client import MarbleClient
2-
from .exceptions import MarbleBaseError, ServiceNotAvailableError, UnknownNodeError
2+
from .exceptions import MarbleBaseError, ServiceNotAvailableError, UnknownNodeError, JupyterEnvironmentError
33
from .node import MarbleNode
44
from .services import MarbleService

marble_client/client.py

Lines changed: 88 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1-
import contextlib
21
import datetime
32
import json
43
import os
54
import shutil
65
import warnings
76
from functools import wraps, cache
8-
from typing import Optional
7+
from typing import Optional, Any
98
from urllib.parse import urlparse
109

1110
import dateutil.parser
1211
import requests
1312

14-
from marble_client.constants import CACHE_FNAME, CACHE_META_FNAME, NODE_REGISTRY_URL
13+
from marble_client.constants import CACHE_FNAME, NODE_REGISTRY_URL
1514
from marble_client.exceptions import UnknownNodeError, JupyterEnvironmentError
1615
from marble_client.node import MarbleNode
1716

@@ -25,45 +24,43 @@ def check_jupyterlab(f):
2524
2625
This is used as a pre-check for functions that only work in a Marble Jupyterlab
2726
environment.
27+
28+
Note that this checks if either the BIRDHOUSE_HOST_URL or PAVICS_HOST_URL are present to support
29+
versions of birdhouse-deploy prior to 2.4.0.
2830
"""
2931
@wraps(f)
3032
def wrapper(*args, **kwargs):
31-
if os.getenv("PAVICS_HOST_URL"):
33+
birdhouse_host_var = ("PAVICS_HOST_URL", "BIRDHOUSE_HOST_URL")
34+
jupyterhub_env_vars = ("JUPYTERHUB_API_URL", "JUPYTERHUB_USER", "JUPYTERHUB_API_TOKEN")
35+
if any(os.getenv(var) for var in birdhouse_host_var) and all(os.getenv(var) for var in jupyterhub_env_vars):
3236
return f(*args, **kwargs)
3337
raise JupyterEnvironmentError("Not in a Marble jupyterlab environment")
3438
return wrapper
3539

3640

3741
class MarbleClient:
38-
def __init__(self, fallback: Optional[bool] = True) -> None:
42+
_registry_cache_key = "marble_client_python:cached_registry"
43+
_registry_cache_last_updated_key = "marble_client_python:last_updated"
44+
45+
def __init__(self, fallback: bool = True) -> None:
3946
"""Constructor method
4047
4148
:param fallback: If True, then fall back to a cached version of the registry
4249
if the cloud registry cannot be accessed, defaults to True
43-
:type fallback: Optional[bool], optional
50+
:type fallback: bool
4451
:raises requests.exceptions.RequestException: Raised when there is an issue
4552
connecting to the cloud registry and `fallback` is False
4653
:raises UserWarning: Raised when there is an issue connecting to the cloud registry
4754
and `fallback` is True
4855
:raise RuntimeError: If cached registry needs to be read but there is no cache
4956
"""
50-
self._fallback = fallback
5157
self._nodes: dict[str, MarbleNode] = {}
52-
self._registry: dict = {}
53-
try:
54-
registry = requests.get(NODE_REGISTRY_URL)
55-
registry.raise_for_status()
56-
except (requests.exceptions.RequestException, requests.exceptions.ConnectionError):
57-
if self._fallback:
58-
warnings.warn("Cannot retrieve cloud registry. Falling back to cached version")
59-
self._load_registry_from_cache()
60-
else:
61-
raise
62-
else:
63-
self._load_registry_from_cloud(registry)
58+
self._registry_uri: str
59+
self._registry: dict
60+
self._registry_uri, self._registry = self._load_registry(fallback)
6461

65-
for node, node_details in self._registry.items():
66-
self._nodes[node] = MarbleNode(node, node_details)
62+
for node_id, node_details in self._registry.items():
63+
self._nodes[node_id] = MarbleNode(node_id, node_details)
6764

6865
@property
6966
def nodes(self) -> dict[str, MarbleNode]:
@@ -78,11 +75,13 @@ def this_node(self) -> MarbleNode:
7875
7976
Note that this function only works in a Marble Jupyterlab environment.
8077
"""
81-
host_url = urlparse(os.getenv("PAVICS_HOST_URL"))
78+
# PAVICS_HOST_URL is the deprecated variable used in older versions (<2.4.0) of birdhouse-deploy
79+
url_string = os.getenv("BIRDHOUSE_HOST_URL", os.getenv("PAVICS_HOST_URL"))
80+
host_url = urlparse(url_string)
8281
for node in self.nodes.values():
8382
if urlparse(node.url).hostname == host_url.hostname:
8483
return node
85-
raise UnknownNodeError(f"No node found in the registry with the url {host_url}")
84+
raise UnknownNodeError(f"No node found in the registry with the url '{url_string}'")
8685

8786
@check_jupyterlab
8887
def this_session(self, session: Optional[requests.Session] = None) -> requests.Session:
@@ -97,78 +96,94 @@ def this_session(self, session: Optional[requests.Session] = None) -> requests.S
9796
session = requests.Session()
9897
r = requests.get(f"{os.getenv('JUPYTERHUB_API_URL')}/users/{os.getenv('JUPYTERHUB_USER')}",
9998
headers={"Authorization": f"token {os.getenv('JUPYTERHUB_API_TOKEN')}"})
99+
try:
100+
r.raise_for_status()
101+
except requests.HTTPError as err:
102+
raise JupyterEnvironmentError("Cannot retrieve login cookies through the JupyterHub API.") from err
100103
for name, value in r.json().get("auth_state", {}).get("magpie_cookies", {}).items():
101104
session.cookies.set(name, value)
102105
return session
103106

107+
@property
108+
def registry_uri(self):
109+
return self._registry_uri
110+
104111
def __getitem__(self, node: str) -> MarbleNode:
105112
try:
106113
return self.nodes[node]
107-
except KeyError:
108-
raise UnknownNodeError(f"No node named '{node}' in the Marble network.") from None
114+
except KeyError as err:
115+
raise UnknownNodeError(f"No node named '{node}' in the Marble network.") from err
109116

110-
def _load_registry_from_cloud(self, registry_response: requests.Response) -> None:
111-
try:
112-
self._registry = registry_response.json()
113-
except json.JSONDecodeError:
114-
raise RuntimeError(
115-
"Could not parse JSON returned from the cloud registry. "
116-
f"Consider re-trying with 'fallback' set to True when instantiating the {self.__class__.__name__} "
117-
"object."
118-
)
119-
self._save_registry_as_cache()
120-
121-
def _load_registry_from_cache(self):
122-
try:
123-
with open(CACHE_FNAME, "r") as f:
124-
self._registry = json.load(f)
125-
except FileNotFoundError:
126-
raise RuntimeError(f"Local registry cache not found. No file named {CACHE_FNAME}.") from None
117+
def __contains__(self, node: str) -> bool:
118+
"""Check if a node is available
119+
120+
:param node: ID of the Marble node
121+
:type node: str
122+
:return: True if the node is present in the registry, False otherwise
123+
:rtype: bool
124+
"""
125+
return node in self.nodes
127126

127+
def _load_registry(self, fallback: bool = True) -> tuple[str, dict[str, Any]]:
128128
try:
129-
with open(CACHE_META_FNAME, "r") as f:
130-
data = json.load(f)
131-
date = dateutil.parser.isoparse(data["last_cache_date"])
132-
except (FileNotFoundError, ValueError):
133-
date = "Unknown"
129+
registry_response = requests.get(NODE_REGISTRY_URL)
130+
registry_response.raise_for_status()
131+
registry = registry_response.json()
132+
except (requests.exceptions.RequestException, requests.exceptions.ConnectionError) as err:
133+
error = err
134+
error_msg = f"Cannot retrieve registry from {NODE_REGISTRY_URL}."
135+
except json.JSONDecodeError as err:
136+
error = err
137+
error_msg = f"Could not parse JSON returned from the registry at {NODE_REGISTRY_URL}"
138+
else:
139+
self._save_registry_as_cache(registry)
140+
return NODE_REGISTRY_URL, registry
134141

135-
print(f"Registry loaded from cache dating: {date}")
142+
if fallback:
143+
warnings.warn(f"{error_msg} Falling back to cached version")
144+
return f"file://{os.path.realpath(CACHE_FNAME)}", self._load_registry_from_cache()
145+
else:
146+
raise RuntimeError(error_msg) from error
136147

137-
def _save_registry_as_cache(self):
148+
def _load_registry_from_cache(self) -> dict[str, Any]:
149+
try:
150+
with open(CACHE_FNAME) as f:
151+
cached_registry = json.load(f)
152+
except FileNotFoundError as err:
153+
raise RuntimeError(f"Local registry cache not found. No file named {CACHE_FNAME}.") from err
154+
except json.JSONDecodeError as err:
155+
raise RuntimeError(f"Could not parse JSON returned from the cached registry at {CACHE_FNAME}") from err
156+
else:
157+
if self._registry_cache_key in cached_registry:
158+
registry = cached_registry[self._registry_cache_key]
159+
date = dateutil.parser.isoparse(cached_registry[self._registry_cache_last_updated_key])
160+
else:
161+
# registry is cached in old format, re-cache it in the newer format
162+
registry = cached_registry
163+
self._save_registry_as_cache(registry)
164+
date = "Unknown"
165+
print(f"Registry loaded from cache dating: {date}")
166+
return registry
167+
168+
def _save_registry_as_cache(self, registry: dict[str, Any]) -> None:
138169
cache_backup = CACHE_FNAME + ".backup"
139-
cache_meta_backup = CACHE_META_FNAME + ".backup"
140170

141171
# Create cache parent directories if they don't exist
142-
for cache_dir in {os.path.dirname(CACHE_FNAME), os.path.dirname(CACHE_META_FNAME)}:
143-
os.makedirs(cache_dir, exist_ok=True)
144-
145-
# Suppressing a FileNotFoundError error for the first use case where a cached registry file
146-
# does not exist
147-
with contextlib.suppress(FileNotFoundError):
172+
os.makedirs(os.path.dirname(CACHE_FNAME), exist_ok=True)
173+
if os.path.isfile(CACHE_FNAME):
148174
shutil.copy(CACHE_FNAME, cache_backup)
149-
shutil.copy(CACHE_META_FNAME, cache_meta_backup)
150175

151176
try:
152-
metadata = {"last_cache_date": datetime.datetime.now(tz=datetime.timezone.utc).isoformat()}
153-
154-
# Write the metadata
155-
with open(CACHE_META_FNAME, "w") as f:
156-
json.dump(metadata, f)
157-
158-
# write the registry
159177
with open(CACHE_FNAME, "w") as f:
160-
json.dump(self._registry, f)
161-
162-
except OSError as e:
163-
# If either the cache file or the metadata file could not be written, then restore from backup files
178+
data = {self._registry_cache_key: registry,
179+
self._registry_cache_last_updated_key: datetime.datetime.now(tz=datetime.timezone.utc).isoformat()}
180+
json.dump(data, f)
181+
except OSError:
182+
# If the cache file cannot be written, then restore from backup files
164183
shutil.copy(cache_backup, CACHE_FNAME)
165-
shutil.copy(cache_meta_backup, CACHE_META_FNAME)
166-
167184
finally:
168-
# Similarly, suppressing an error that I don't need to catch
169-
with contextlib.suppress(FileNotFoundError):
185+
if os.path.isfile(cache_backup):
170186
os.remove(cache_backup)
171-
os.remove(cache_meta_backup)
172187

173188

174189
if __name__ == "__main__":

marble_client/constants.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from platformdirs import user_cache_dir
44

5-
__all__ = ("NODE_REGISTRY_URL", "CACHE_FNAME", "CACHE_META_FNAME")
5+
__all__ = ("NODE_REGISTRY_URL", "CACHE_FNAME")
66

77
# Marble node registry URL
88
NODE_REGISTRY_URL: str = os.getenv(
@@ -14,6 +14,3 @@
1414

1515
# location to write registry cache
1616
CACHE_FNAME: str = os.path.join(_CACHE_DIR, "registry.cached.json")
17-
18-
# location to write metadata about the registry cache
19-
CACHE_META_FNAME: str = os.path.join(_CACHE_DIR, "cache_metadata.json")

0 commit comments

Comments
 (0)