Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
[package]
authors = ["Peter Robinson, Rouven Router, Patrick Nairne"]
description = "A Rust library for shared tools between different Phenopacket extraction programs."
authors = ["Patrick Nairne, Rouven Reuter"]
description = "A Rust library for retrieving data from the VariantValidator and HGNC APIs for Phenopackets."
edition = "2024"
homepage = "https://robinsongroup.github.io/"
license = "MIT"
name = "pivot"
version = "0.1.4"
license-file = "LICENSE"
name = "pivotal"
version = "0.1.6"
keywords = ["variant", "validator", "hgnc", "hgvs", "phenopacket"]
readme = "README.md"
repository = "https://github.com/psnairne/PIVOT"


[dependencies]
phenopackets = { version = "0.2.2-post1", features = ["serde"] }
rstest = "0.26.1"
serde = "1.0.228"
thiserror = "2.0.17"
ratelimit = "0.10.0"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## PIVOT
A Rust library for getting data from VariantValidator.
A Rust library for getting data from VariantValidator and HGNC.

## License
This project is licensed under MIT.
9 changes: 4 additions & 5 deletions src/caching/redb_cacher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,16 @@ impl<T: Cacheable> Default for RedbCacher<T> {
fn default() -> Self {
let pkg_name = env!("CARGO_PKG_NAME");

let pivot_cache_dir = ProjectDirs::from("", "", pkg_name)
let pivotal = ProjectDirs::from("", "", pkg_name)
.map(|project_dir| project_dir.cache_dir().to_path_buf())
.or_else(|| home_dir().map(|home| home.join(pkg_name)))
.unwrap_or_else(|| panic!("Could not find cache directory or home directory."));

if !pivot_cache_dir.exists() {
fs::create_dir_all(&pivot_cache_dir)
.expect("Failed to create default cache directory.");
if !pivotal.exists() {
fs::create_dir_all(&pivotal).expect("Failed to create default cache directory.");
}

RedbCacher::new(pivot_cache_dir.join(type_name::<T>()))
RedbCacher::new(pivotal.join(type_name::<T>()))
}
}

Expand Down
30 changes: 26 additions & 4 deletions src/hgnc/cached_hgnc_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ use crate::hgnc::json_schema::GeneDoc;
use crate::hgnc::traits::HGNCData;
use std::fmt::{Debug, Formatter};
use std::path::PathBuf;
use std::sync::{Mutex, MutexGuard, OnceLock};

static HGNC_CACHE_LOCK: OnceLock<Mutex<()>> = OnceLock::new();

fn hgnc_cache_mutex() -> &'static Mutex<()> {
HGNC_CACHE_LOCK.get_or_init(|| Mutex::new(()))
}

fn lock_mutex(mutex: &'_ Mutex<()>) -> Result<MutexGuard<'_, ()>, HGNCError> {
mutex
.lock()
.map_err(|e| HGNCError::MutexError(e.to_string()))
}

pub struct CachedHGNCClient {
cacher: RedbCacher<GeneDoc>,
Expand All @@ -14,13 +27,22 @@ pub struct CachedHGNCClient {

impl HGNCData for CachedHGNCClient {
fn request_gene_data(&self, query: GeneQuery) -> Result<GeneDoc, HGNCError> {
let cache = self.cacher.open_cache()?;
if let Some(gene_doc) = self.cacher.find_cache_entry(query.inner(), &cache) {
return Ok(gene_doc);
{
let _guard = lock_mutex(hgnc_cache_mutex())?;
let cache = self.cacher.open_cache()?;
if let Some(gene_doc) = self.cacher.find_cache_entry(query.inner(), &cache) {
return Ok(gene_doc);
}
}

let doc = self.hgnc_client.request_gene_data(query)?;
self.cacher.cache_object(doc.clone(), &cache)?;

{
let _guard = lock_mutex(hgnc_cache_mutex())?;
let cache = self.cacher.open_cache()?;
self.cacher.cache_object(doc.clone(), &cache)?;
}

Ok(doc)
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/hgnc/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ pub enum HGNCError {
CacheTable(#[from] TableError),
#[error(transparent)]
Request(#[from] reqwest::Error),
#[error("Something went wrong when using Mutex: {0}")]
MutexError(String),
#[error("HgncAPI returned an error on {attempts} attempts to retrieve data about gene {gene}")]
HgncAPI { gene: String, attempts: usize },
}
61 changes: 39 additions & 22 deletions src/hgnc/hgnc_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,60 @@ use crate::hgnc::traits::HGNCData;
use ratelimit::Ratelimiter;
use reqwest::blocking::Client;
use std::fmt::{Debug, Formatter};
use std::sync::OnceLock;
use std::thread::sleep;
use std::time::Duration;

static HGNC_RATE_LIMITER: OnceLock<Ratelimiter> = OnceLock::new();

fn hgnc_rate_limiter() -> &'static Ratelimiter {
HGNC_RATE_LIMITER.get_or_init(|| {
Ratelimiter::builder(10, Duration::from_millis(1100))
.max_tokens(10)
.build()
.expect("Building rate limiter failed")
})
}

pub struct HGNCClient {
rate_limiter: Ratelimiter,
attempts: usize,
api_url: String,
client: Client,
}

impl HGNCClient {
pub fn new(rate_limiter: Ratelimiter, api_url: String) -> Self {
pub fn new(attempts: usize, api_url: String) -> Self {
HGNCClient {
rate_limiter,
attempts,
api_url,
client: Client::new(),
}
}

fn fetch_request(&self, url: String) -> Result<Vec<GeneDoc>, HGNCError> {
if let Err(duration) = self.rate_limiter.try_wait() {
sleep(duration);
fn fetch_request(&self, url: &str, query: &GeneQuery) -> Result<Vec<GeneDoc>, HGNCError> {
for _ in 0..self.attempts {
if let Err(duration) = hgnc_rate_limiter().try_wait() {
sleep(duration);
}
let response = self
.client
.get(url)
.header("User-Agent", "PIVOT")
.header("Accept", "application/json")
.send();

if let Ok(response) = response
&& response.status().is_success()
{
let gene_response = response.json::<GeneResponse>()?;
return Ok(gene_response.response.docs);
}
}
let response = self
.client
.get(url.clone())
.header("User-Agent", "PIVOT")
.header("Accept", "application/json")
.send()?;

let gene_response = response.json::<GeneResponse>()?;

Ok(gene_response.response.docs)
Err(HGNCError::HgncAPI {
gene: query.inner().to_string(),
attempts: self.attempts,
})
}
}

Expand All @@ -46,7 +68,7 @@ impl HGNCData for HGNCClient {
GeneQuery::Symbol(symbol) => format!("{}fetch/symbol/{}", self.api_url, symbol),
GeneQuery::HgncId(id) => format!("{}fetch/hgnc_id/{}", self.api_url, id),
};
let docs = self.fetch_request(fetch_url)?;
let docs = self.fetch_request(&fetch_url, &query)?;

if docs.len() == 1 {
Ok(docs.first().unwrap().clone())
Expand All @@ -62,12 +84,7 @@ impl HGNCData for HGNCClient {

impl Default for HGNCClient {
fn default() -> Self {
let rate_limiter = Ratelimiter::builder(10, Duration::from_secs(1))
.max_tokens(10)
.build()
.expect("Building rate limiter failed");

HGNCClient::new(rate_limiter, "https://rest.genenames.org/".to_string())
HGNCClient::new(3, "https://rest.genenames.org/".to_string())
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/hgnc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
//! ## HGNCClient
//!
//! ```rust
//! use pivot::hgnc::{HGNCClient, HGNCData, GeneQuery};
//! use pivotal::hgnc::{HGNCClient, HGNCData, GeneQuery};
//!
//! let client = HGNCClient::default();
//! let gene_symbol = client.request_gene_symbol(GeneQuery::from("HGNC:13089")).unwrap();
Expand All @@ -54,7 +54,7 @@
//! ## CachedHGNCClient
//!
//! ```rust
//! use pivot::hgnc::{HGNCClient, HGNCData, GeneQuery, CachedHGNCClient};
//! use pivotal::hgnc::{HGNCClient, HGNCData, GeneQuery, CachedHGNCClient};
//!
//! let temp_dir = tempfile::tempdir().expect("Failed to create temporary directory");
//! let cache_file_path = temp_dir.path().join("cache.hgnc");
Expand Down
34 changes: 29 additions & 5 deletions src/hgvs/cached_hgvs_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ use crate::hgvs::hgvs_client::HGVSClient;
use crate::hgvs::hgvs_variant::HgvsVariant;
use crate::hgvs::traits::HGVSData;
use std::path::PathBuf;
use std::sync::{Mutex, MutexGuard, OnceLock};

static HGVS_CACHE_LOCK: OnceLock<Mutex<()>> = OnceLock::new();

fn hgvs_cache_mutex() -> &'static Mutex<()> {
HGVS_CACHE_LOCK.get_or_init(|| Mutex::new(()))
}

fn lock_mutex(mutex: &'_ Mutex<()>) -> Result<MutexGuard<'_, ()>, HGVSError> {
mutex
.lock()
.map_err(|e| HGVSError::MutexError(e.to_string()))
}

#[derive(Debug)]
pub struct CachedHGVSClient {
Expand Down Expand Up @@ -37,16 +50,27 @@ impl CachedHGVSClient {

impl HGVSData for CachedHGVSClient {
fn request_and_validate_hgvs(&self, unvalidated_hgvs: &str) -> Result<HgvsVariant, HGVSError> {
let cache = self.cacher.open_cache()?;
if let Some(hgvs_variant) = self.cacher.find_cache_entry(unvalidated_hgvs, &cache) {
return Ok(hgvs_variant);
{
let _guard = lock_mutex(hgvs_cache_mutex())?;

let cache = self.cacher.open_cache()?;
if let Some(hgvs_variant) = self.cacher.find_cache_entry(unvalidated_hgvs, &cache) {
return Ok(hgvs_variant);
}
}

let hgvs_variant = self
.hgvs_client
.request_and_validate_hgvs(unvalidated_hgvs)?;
self.cacher.cache_object(hgvs_variant.clone(), &cache)?;
Ok(hgvs_variant.clone())

{
let _guard = lock_mutex(hgvs_cache_mutex())?;

let cache = self.cacher.open_cache()?;
self.cacher.cache_object(hgvs_variant.clone(), &cache)?;
}

Ok(hgvs_variant)
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/hgvs/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ pub enum HGVSError {
VariantValidatorAPI { hgvs: String, attempts: usize },
#[error("VariantValidator response for {hgvs} had an unexpected format: {format_issue}")]
VariantValidatorResponseUnexpectedFormat { hgvs: String, format_issue: String },
#[error("VariantValidator fetch request for {hgvs} failed. Error: {err}.")]
FetchRequest { hgvs: String, err: String },
#[error(transparent)]
CacheDatabase(#[from] DatabaseError),
#[error(transparent)]
Expand All @@ -79,4 +77,6 @@ pub enum HGVSError {
CacheStorage(#[from] StorageError),
#[error(transparent)]
CacherError(#[from] CacherError),
#[error("Something went wrong when using Mutex: {0}")]
MutexError(String),
}
Loading
Loading