33===========
44
55This module defines and implements an interface for querying the Global
6- Variome shared Leiden Open Variants Database (LOVD) instance.
6+ Variome– shared Leiden Open Variants Database (LOVD) instance.
77
88"""
99from __future__ import annotations
1010
11+ import json
1112import logging
1213import os
1314import time
2122
2223from lovd .constants import EMAIL , TARGET_GENE_SYMBOLS , USER_AGENT_STRING
2324
25+
26+ # ─── type aliases ───────────────────────────────────────────────────────────────── ✦ ─
27+ #
28+ JSONDecodeError : TypeAlias = json .JSONDecodeError
29+ Logger : TypeAlias = logging .Logger
30+ PathLike : TypeAlias = os .PathLike
31+ YAMLError : TypeAlias = yaml .YAMLError
32+
2433# ─── logger setup ───────────────────────────────────────────────────────────────── ✦ ─
2534#
2635# This `logging.Logger` instance is not used for logging responses to the
2736# LOVD client's API requests. That logger is defined on `LOVDClient` as
2837# its `.logger` attribute.
2938#
30- logging .basicConfig (
31- level = "INFO" ,
32- format = "%(name)s – %(message)s"
33- )
39+ logging .basicConfig (level = "INFO" , format = "%(name)s – %(message)s" )
3440logger = logging .getLogger (__name__ )
3541logger .info ("Logger setup complete." )
3642
3743
38- # ─── type aliases ───────────────────────────────────────────────────────────────── ✦ ─
39- #
40- PathLike : TypeAlias = os .PathLike
41-
42-
4344# ─── get environment variables from `.env` ──────────────────────────────────────── ✦ ─
4445#
46+ # TODO: Assess whether loading configurations from both `acquisition.yaml` and `.env`.
47+ #
4548try :
4649 load_dotenv ()
4750except FileNotFoundError as e :
5255
5356# ─── rate limiting ──────────────────────────────────────────────────────────────── ✦ ─
5457#
55- # The [LOVD 3.0 user manual](https://databases.lovd.nl/shared/docs/manual.html)
56- # stipulates that users ought to limit their API request rates "to a maximum of
57- # 5 per second per server/domain," which translates to a fixed rate of one
58- # API request per 0.2 seconds.
58+ #: The [LOVD 3.0 user manual](https://databases.lovd.nl/shared/docs/manual.html)
59+ #: stipulates that users ought to limit their API request rates "to a maximum of
60+ #: 5 per second per server/domain," which translates to a fixed rate of one
61+ #: API request per 0.2 seconds.
5962#
6063LOVD_RATE_LIMIT : int = 5
6164
6265
63- # ─── configuration loading ──────────────────────────────────────────────────────── ✦ ─
66+ # | loaders
6467#
6568def load_acquisition_config (config_path : PathLike | None = None ) -> dict [str , Any ]:
6669 """
@@ -117,18 +120,89 @@ def load_acquisition_config(config_path: PathLike | None = None) -> dict[str, An
117120 try :
118121 with open (config_file , "r" , encoding = "utf-8" ) as f :
119122 user_config = yaml .safe_load (f ) or {}
120-
123+
121124 # Merge the user's config specifications with defaults.
122125 config .update (user_config )
123126 logger .info (f"Loaded configuration from { config_file } " )
124127 break
125128 except (yaml .YAMLError , OSError ) as e :
126129 logger .warning (f"Failed to load config from { config_file } : { e } " )
127130 continue
128-
131+
129132 return config
130133
131134
135+ def load_variants (
136+ filepath : str | PathLike | None = None
137+ ) -> dict [str , Any ]:
138+ """
139+ Load variants from a given filepath.
140+
141+ Parameters
142+ ----------
143+ filepath : str | PathLike, optional
144+ A a string or path-like object representing the file or directory
145+ from which to load a corpus of JSON-serialized variant records.
146+
147+ Returns
148+ -------
149+ A dictionary containing the variant records loaded from ``filepath``.
150+
151+ """
152+ if isinstance (filepath , str ) and filepath .startswith ("~" ):
153+ filepath : PathLike = Path (filepath ).expanduser ()
154+ elif isinstance (filepath , str ):
155+ filepath = Path (filepath )
156+ else :
157+ pass
158+
159+ if not filepath .exists ():
160+ logger .error (f"Failed to load variant records from `{ filepath } `." )
161+ raise FileNotFoundError ("Unable to locate the provided filepath." )
162+
163+ variants : dict [str , Any ] = {}
164+
165+ if filepath .is_dir ():
166+ logger .info (f"Iterating over directory `{ filepath } `..." )
167+ for fp in filepath .iterdir ():
168+ if fp .suffix == ".json" :
169+ logger .info (f"Found JSON file `{ fp } `; extracting data..." )
170+ with open (fp , "r" ) as f :
171+ try :
172+ data : dict [str , Any ] = json .load (f )
173+ for sym , dat in data .items ():
174+ variants [sym ] = dat
175+ logger .info (f"Successfully decoded JSON data at { fp } ." )
176+ except JSONDecodeError as e :
177+ logger .error ("Failed to decode JSON." )
178+ raise e
179+ else :
180+ logger .info (f"Successfully iterated over directory `{ filepath } ." )
181+ else :
182+ if not filepath .suffix == ".json" :
183+ logger .error ("Received a non-JSON file reference as input." )
184+ raise ValueError ("The `filepath` argument must point either to a non-empty "
185+ "directory or a JSON file." )
186+ else :
187+ logger .info (f"Extracting data from `{ filepath } ..." )
188+ with open (filepath , "r" ) as f :
189+ try :
190+ data : dict [str , Any ] = json .load (f )
191+ for sym , dat in data .items ():
192+ variants [sym ] = dat
193+ logger .info (f"Successfully decoded JSON data at { filepath } ." )
194+ except JSONDecodeError as e :
195+ logger .error ("Failed to decode JSON." )
196+ raise e
197+ except IOError as e :
198+ logger .error ("Encountered an unhandled I/O exception." )
199+ raise e
200+
201+ # If we've made it this far, `variants` should be a dictionary that contains
202+ # gene symbols mapped to their variant records.
203+ return variants
204+
205+
132206# ─── interface ──────────────────────────────────────────────────────────────────── ✦ ─
133207#
134208class LOVDClient :
@@ -138,7 +212,7 @@ class LOVDClient:
138212
139213 def __init__ (
140214 self ,
141- config_path : PathLike | None = None ,
215+ config : dict | PathLike | None = None ,
142216 email : str | None = None ,
143217 target_gene_symbols : list [str ] | None = None ,
144218 user_agent : str | None = None ,
@@ -154,7 +228,7 @@ def __init__(
154228
155229 Parameters
156230 ----------
157- config_path : PathLike, optional
231+ config_path : dict | PathLike, optional
158232 A path-like object representing the acquisition configuration
159233 filepath. If left unspecified, this constructor searches for
160234 ``acquisition.yaml`` first in the current working directory
@@ -180,7 +254,10 @@ def __init__(
180254 progress indicator during execution. Defaults to ``None``.
181255
182256 """
183- self .config = load_acquisition_config (config_path )
257+ if isinstance (config , dict ):
258+ self .config = config
259+ else :
260+ self .config = load_acquisition_config (config )
184261
185262 # ─── configuration ────────────────────────────────────────────────────────────
186263 #
@@ -500,7 +577,7 @@ def get_variants_for_genes(
500577 suffix_parts = []
501578 if search_terms :
502579 suffix_parts .append ("filtered" )
503-
580+
504581 suffix = (
505582 f"_{ '_' .join (suffix_parts )} _variants.json"
506583 if suffix_parts
@@ -673,7 +750,7 @@ def get_lovd_variants(
673750 >>> variants = get_lovd_variants(config_path="my_project.yaml")
674751
675752 """
676- client = LOVDClient (config_path = config_path )
753+ client = LOVDClient (config = config_path )
677754
678755 # Use config values as defaults, overriding with any specified parameters.
679756 target_gene_symbols = genes or client .config .get ("target_gene_symbols" )
@@ -761,7 +838,7 @@ def variants_to_dataframe(
761838 try :
762839 from tqdm import tqdm
763840 items = tqdm (variants_data .items ())
764- except ImportError as e :
841+ except ImportError :
765842 logger .warning (
766843 "`tqdm` does not appear to be installed, so `.with_progress()`\n "
767844 "has no effect. To suppress this warning, run the following\n "
0 commit comments