Skip to content

Commit 6fd63ab

Browse files
committed
Add example acquisition config file.
1 parent 6a4cce3 commit 6fd63ab

File tree

4 files changed

+170
-41
lines changed

4 files changed

+170
-41
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,7 @@ __marimo__/
217217

218218
# Project-specific files
219219
output/
220+
221+
# Miscellaneous exclusions
222+
*.bak
223+
acquisition.yaml

acquisition.example.yaml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
---
2+
# acquisition.yaml
3+
#
4+
# Optionally explain why you've identified these specific gene symbols as suitable
5+
# candidates for further analysis.
6+
#
7+
# Target Gene Symbols
8+
# -------------------
9+
#
10+
# The `target_gene_symbols` field must comprise an array of valid gene symbols,
11+
# as in the below provided example, which targets a subset of genes associated with
12+
# heritable connective tissue diseases.
13+
#
14+
target_gene_symbols:
15+
- COL5A1
16+
- COL5A2
17+
- COL1A1
18+
- COL3A1
19+
- PLOD1
20+
- FKBP14
21+
- COL1A1
22+
- COL1A2
23+
- ADAMTS2
24+
- ZNF469
25+
- PRDM5
26+
- TNXB
27+
- COL1A2
28+
- CHST14
29+
- DSE
30+
- COL12A1
31+
- C1R
32+
- C1S
33+
- B4GALT7
34+
- B3GALT6
35+
- SLC39A13
36+
- COL1A3
37+
- AEBP1
38+
- COL6A1
39+
- COL6A2
40+
- COL6A3
41+
- PLOD2
42+
- PLOD3
43+
- P4HB
44+
- LEPRE1
45+
- CRTAP
46+
- PPIB
47+
- SERPINH1
48+
- P3H1
49+
- P3H2
50+
- P3H3
51+
- P3H4
52+
- KLK15
53+
- MTHFR
54+
55+
56+
# Parameters for API Request Headers
57+
# ----------------------------------
58+
#
59+
# LOVDTools includes this information in every API request to the Global
60+
# Variome–shared LOVD instance. This helps the instance's web administrators to better
61+
# understand how researchers actually use the API in practice and, more importantly,
62+
# to secure the database's server(s) against malicious requests.
63+
#
64+
email: "contact@example.com" # REQUIRED, as per LOVD docs
65+
user_agent: "My-Organization/1.0 (my research purpose)" # Optional
66+
...

acquisition.yaml

Lines changed: 0 additions & 18 deletions
This file was deleted.

src/lovd/client.py

Lines changed: 100 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
===========
44
55
This module defines and implements an interface for querying the Global
6-
Variome shared Leiden Open Variants Database (LOVD) instance.
6+
Variomeshared Leiden Open Variants Database (LOVD) instance.
77
88
"""
99
from __future__ import annotations
1010

11+
import json
1112
import logging
1213
import os
1314
import time
@@ -21,27 +22,29 @@
2122

2223
from lovd.constants import EMAIL, TARGET_GENE_SYMBOLS, USER_AGENT_STRING
2324

25+
26+
# ─── type aliases ───────────────────────────────────────────────────────────────── ✦ ─
27+
#
28+
JSONDecodeError: TypeAlias = json.JSONDecodeError
29+
Logger: TypeAlias = logging.Logger
30+
PathLike: TypeAlias = os.PathLike
31+
YAMLError: TypeAlias = yaml.YAMLError
32+
2433
# ─── logger setup ───────────────────────────────────────────────────────────────── ✦ ─
2534
#
2635
# This `logging.Logger` instance is not used for logging responses to the
2736
# LOVD client's API requests. That logger is defined on `LOVDClient` as
2837
# its `.logger` attribute.
2938
#
30-
logging.basicConfig(
31-
level="INFO",
32-
format="%(name)s – %(message)s"
33-
)
39+
logging.basicConfig(level="INFO", format="%(name)s – %(message)s")
3440
logger = logging.getLogger(__name__)
3541
logger.info("Logger setup complete.")
3642

3743

38-
# ─── type aliases ───────────────────────────────────────────────────────────────── ✦ ─
39-
#
40-
PathLike: TypeAlias = os.PathLike
41-
42-
4344
# ─── get environment variables from `.env` ──────────────────────────────────────── ✦ ─
4445
#
46+
# TODO: Assess whether loading configurations from both `acquisition.yaml` and `.env`.
47+
#
4548
try:
4649
load_dotenv()
4750
except FileNotFoundError as e:
@@ -52,15 +55,15 @@
5255

5356
# ─── rate limiting ──────────────────────────────────────────────────────────────── ✦ ─
5457
#
55-
# The [LOVD 3.0 user manual](https://databases.lovd.nl/shared/docs/manual.html)
56-
# stipulates that users ought to limit their API request rates "to a maximum of
57-
# 5 per second per server/domain," which translates to a fixed rate of one
58-
# API request per 0.2 seconds.
58+
#: The [LOVD 3.0 user manual](https://databases.lovd.nl/shared/docs/manual.html)
59+
#: stipulates that users ought to limit their API request rates "to a maximum of
60+
#: 5 per second per server/domain," which translates to a fixed rate of one
61+
#: API request per 0.2 seconds.
5962
#
6063
LOVD_RATE_LIMIT: int = 5
6164

6265

63-
# ─── configuration loading ──────────────────────────────────────────────────────── ✦ ─
66+
# | loaders
6467
#
6568
def load_acquisition_config(config_path: PathLike | None = None) -> dict[str, Any]:
6669
"""
@@ -117,18 +120,89 @@ def load_acquisition_config(config_path: PathLike | None = None) -> dict[str, An
117120
try:
118121
with open(config_file, "r", encoding="utf-8") as f:
119122
user_config = yaml.safe_load(f) or {}
120-
123+
121124
# Merge the user's config specifications with defaults.
122125
config.update(user_config)
123126
logger.info(f"Loaded configuration from {config_file}")
124127
break
125128
except (yaml.YAMLError, OSError) as e:
126129
logger.warning(f"Failed to load config from {config_file}: {e}")
127130
continue
128-
131+
129132
return config
130133

131134

135+
def load_variants(
136+
filepath: str | PathLike | None = None
137+
) -> dict[str, Any]:
138+
"""
139+
Load variants from a given filepath.
140+
141+
Parameters
142+
----------
143+
filepath : str | PathLike, optional
144+
A a string or path-like object representing the file or directory
145+
from which to load a corpus of JSON-serialized variant records.
146+
147+
Returns
148+
-------
149+
A dictionary containing the variant records loaded from ``filepath``.
150+
151+
"""
152+
if isinstance(filepath, str) and filepath.startswith("~"):
153+
filepath: PathLike = Path(filepath).expanduser()
154+
elif isinstance(filepath, str):
155+
filepath = Path(filepath)
156+
else:
157+
pass
158+
159+
if not filepath.exists():
160+
logger.error(f"Failed to load variant records from `{filepath}`.")
161+
raise FileNotFoundError("Unable to locate the provided filepath.")
162+
163+
variants: dict[str, Any] = {}
164+
165+
if filepath.is_dir():
166+
logger.info(f"Iterating over directory `{filepath}`...")
167+
for fp in filepath.iterdir():
168+
if fp.suffix == ".json":
169+
logger.info(f"Found JSON file `{fp}`; extracting data...")
170+
with open(fp, "r") as f:
171+
try:
172+
data: dict[str, Any] = json.load(f)
173+
for sym, dat in data.items():
174+
variants[sym] = dat
175+
logger.info(f"Successfully decoded JSON data at {fp}.")
176+
except JSONDecodeError as e:
177+
logger.error("Failed to decode JSON.")
178+
raise e
179+
else:
180+
logger.info(f"Successfully iterated over directory `{filepath}.")
181+
else:
182+
if not filepath.suffix == ".json":
183+
logger.error("Received a non-JSON file reference as input.")
184+
raise ValueError("The `filepath` argument must point either to a non-empty "
185+
"directory or a JSON file.")
186+
else:
187+
logger.info(f"Extracting data from `{filepath}...")
188+
with open(filepath, "r") as f:
189+
try:
190+
data: dict[str, Any] = json.load(f)
191+
for sym, dat in data.items():
192+
variants[sym] = dat
193+
logger.info(f"Successfully decoded JSON data at {filepath}.")
194+
except JSONDecodeError as e:
195+
logger.error("Failed to decode JSON.")
196+
raise e
197+
except IOError as e:
198+
logger.error("Encountered an unhandled I/O exception.")
199+
raise e
200+
201+
# If we've made it this far, `variants` should be a dictionary that contains
202+
# gene symbols mapped to their variant records.
203+
return variants
204+
205+
132206
# ─── interface ──────────────────────────────────────────────────────────────────── ✦ ─
133207
#
134208
class LOVDClient:
@@ -138,7 +212,7 @@ class LOVDClient:
138212

139213
def __init__(
140214
self,
141-
config_path: PathLike | None = None,
215+
config: dict | PathLike | None = None,
142216
email: str | None = None,
143217
target_gene_symbols: list[str] | None = None,
144218
user_agent: str | None = None,
@@ -154,7 +228,7 @@ def __init__(
154228
155229
Parameters
156230
----------
157-
config_path : PathLike, optional
231+
config_path : dict | PathLike, optional
158232
A path-like object representing the acquisition configuration
159233
filepath. If left unspecified, this constructor searches for
160234
``acquisition.yaml`` first in the current working directory
@@ -180,7 +254,10 @@ def __init__(
180254
progress indicator during execution. Defaults to ``None``.
181255
182256
"""
183-
self.config = load_acquisition_config(config_path)
257+
if isinstance(config, dict):
258+
self.config = config
259+
else:
260+
self.config = load_acquisition_config(config)
184261

185262
# ─── configuration ────────────────────────────────────────────────────────────
186263
#
@@ -500,7 +577,7 @@ def get_variants_for_genes(
500577
suffix_parts = []
501578
if search_terms:
502579
suffix_parts.append("filtered")
503-
580+
504581
suffix = (
505582
f"_{'_'.join(suffix_parts)}_variants.json"
506583
if suffix_parts
@@ -673,7 +750,7 @@ def get_lovd_variants(
673750
>>> variants = get_lovd_variants(config_path="my_project.yaml")
674751
675752
"""
676-
client = LOVDClient(config_path=config_path)
753+
client = LOVDClient(config=config_path)
677754

678755
# Use config values as defaults, overriding with any specified parameters.
679756
target_gene_symbols = genes or client.config.get("target_gene_symbols")
@@ -761,7 +838,7 @@ def variants_to_dataframe(
761838
try:
762839
from tqdm import tqdm
763840
items = tqdm(variants_data.items())
764-
except ImportError as e:
841+
except ImportError:
765842
logger.warning(
766843
"`tqdm` does not appear to be installed, so `.with_progress()`\n"
767844
"has no effect. To suppress this warning, run the following\n"

0 commit comments

Comments
 (0)