Skip to content

Commit e965aa2

Browse files
committed
refactoring cli app
1 parent b7b415b commit e965aa2

File tree

12 files changed

+212
-151
lines changed

12 files changed

+212
-151
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ga4ghphetools"
3-
version = "0.5.17"
3+
version = "0.5.19"
44
edition = "2021"
55
keywords = ["GA4GH", "Phenopacket Schema", "Human Phenotype Ontology"]
66
description = "Generate GA4GH phenopackets from tabular data"

bin/commands/compare.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use clap::{ArgMatches};
2+
3+
pub fn command() -> clap::Command {
4+
clap::Command::new("compare")
5+
.about("Compare two cohorts and export to Excel")
6+
.arg(clap::Arg::new("cohort1").long("cohort1").required(true))
7+
.arg(clap::Arg::new("cohort2").long("cohort2").required(true))
8+
.arg(clap::Arg::new("output").long("output").required(true))
9+
.arg(clap::Arg::new("hpo").long("hpo").required(true))
10+
.arg(
11+
clap::Arg::new("threshold")
12+
.long("threshold")
13+
.default_value("1"),
14+
)
15+
}
16+
17+
#[cfg(feature = "excel_export")]
18+
pub fn handle(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
19+
crate::commands::excel::handle(sub_matches)
20+
}
21+
22+
#[cfg(not(feature = "excel_export"))]
23+
pub fn handle(_sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
24+
eprintln!("This binary was built without the `excel_export` feature");
25+
Ok(())
26+
}

bin/commands/etl.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
use clap::{Arg, ArgMatches};
2+
use ga4ghphetools::dto::etl_dto::EtlDto;
3+
4+
/// Returns the `clap::Command` for ETL
5+
pub fn command() -> clap::Command {
6+
clap::Command::new("etl")
7+
.about("Test converting an EtlDto to CohortData")
8+
.arg(Arg::new("input").short('i').long("input").required(true))
9+
.arg(Arg::new("hpo").short('o').long("hpo").required(true))
10+
}
11+
12+
/// Handler for the subcommand
13+
pub fn handle(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
14+
let input = sub_matches.get_one::<String>("input").unwrap();
15+
let hpo_path = sub_matches.get_one::<String>("hpo").unwrap();
16+
let hpo = crate::load_hpo(hpo_path)?;
17+
18+
let contents = std::fs::read_to_string(input)
19+
.map_err(|e| format!("Failed to read file: {}", e))?;
20+
21+
let dto: EtlDto = serde_json::from_str(&contents)
22+
.map_err(|e| format!("Failed to deserialize JSON: {}", e))?;
23+
24+
let cohort = ga4ghphetools::etl::get_cohort_data_from_etl_dto(hpo.clone(), dto)?;
25+
let json = serde_json::to_string_pretty(&cohort)?;
26+
println!("{}", json);
27+
28+
Ok(())
29+
}

bin/commands/excel.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
use clap::{Arg, ArgMatches};
2+
3+
4+
#[cfg(feature = "excel_export")]
5+
use ga4ghphetools::export::output_excel_comparison;
6+
7+
/// Returns the `clap::Command` for this subcommand
8+
pub fn command() -> clap::Command {
9+
clap::Command::new("excel")
10+
.about("Compare two cohorts and export to Excel")
11+
.arg(Arg::new("cohort1").long("cohort1").required(true))
12+
.arg(Arg::new("cohort2").long("cohort2").required(true))
13+
.arg(Arg::new("output").long("output").required(true))
14+
.arg(Arg::new("hpo").long("hpo").required(true))
15+
.arg(
16+
Arg::new("threshold")
17+
.long("threshold")
18+
.default_value("1"),
19+
)
20+
}
21+
22+
/// Handler for the subcommand
23+
#[cfg(feature = "excel_export")]
24+
pub fn handle(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
25+
let cohort_1 = sub_matches.get_one::<String>("cohort1").unwrap();
26+
let cohort_2 = sub_matches.get_one::<String>("cohort2").unwrap();
27+
let output = sub_matches.get_one::<String>("output").unwrap();
28+
let hpo_path = sub_matches.get_one::<String>("hpo").unwrap();
29+
let threshold: usize = sub_matches.get_one::<String>("threshold").unwrap().parse()?;
30+
let hpo = crate::load_hpo(hpo_path)?;
31+
32+
output_excel_comparison(cohort_1, cohort_2, output, hpo, threshold).map_err(|e| e.into())
33+
}
34+
35+
#[cfg(not(feature = "excel_export"))]
36+
pub fn handle(_sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
37+
eprintln!("This binary was built without the `excel_export` feature");
38+
Ok(())
39+
}

bin/commands/json.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use crate::commands::util::extract_file_name;
2+
3+
use clap::ArgMatches;
4+
5+
pub fn command() -> clap::Command {
6+
clap::Command::new("json")
7+
.about("Q/C Cohort JSON file")
8+
.arg(clap::Arg::new("cohort").short('c').long("cohort"))
9+
.arg(clap::Arg::new("hpo").short('o').long("hpo").required(true))
10+
}
11+
12+
pub fn handle(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
13+
let json_input_path = sub_matches.get_one::<String>("input").expect("Could not read JSON input");
14+
let hpo_path = sub_matches.get_one::<String>("hpo").expect("Could not retrieve hp.json path");
15+
let hpo = crate::load_hpo(hpo_path).expect("Could not construct HPO ontology");
16+
let cohort = ga4ghphetools::factory::load_json_cohort(json_input_path).expect("Could not load Cohort JSON file");
17+
let cohort_file_name = extract_file_name(json_input_path);
18+
match ga4ghphetools::factory::qc_assessment(hpo, &cohort) {
19+
Ok(_) => println!("No Q/C issues identified for {cohort_file_name}."),
20+
Err(e) => eprint!("Error for {cohort_file_name}: {e}"),
21+
}
22+
23+
Ok(())
24+
}
25+

bin/commands/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pub mod excel;
2+
pub mod etl;
3+
pub mod compare;
4+
pub mod json;
5+
pub mod removeterm;
6+
mod util;

bin/commands/removeterm.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
use std::fs::File;
2+
3+
use clap::{Arg, ArgMatches};
4+
use crate::commands::util::extract_file_name;
5+
6+
pub fn command() -> clap::Command {
7+
clap::Command::new("remove-term")
8+
.about("Remove HPO Term and its annotations from Cohort Data file")
9+
.arg(Arg::new("cohort").short('c').long("cohort").required(true))
10+
.arg(Arg::new("hpo-id").short('i').long("id").required(true))
11+
}
12+
13+
14+
pub fn handle(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
15+
let input_json = sub_matches.get_one::<String>("cohort").unwrap();
16+
let hpo_id = sub_matches.get_one::<String>("hpo-id").unwrap();
17+
println!("Remove HPO Term {hpo_id} from Cohort {input_json}");
18+
let cohort = ga4ghphetools::factory::load_json_cohort(input_json).expect("Could not load Cohort JSON file");
19+
let modified_cohort = cohort.remove_hpo_column(hpo_id)?;
20+
let fname = extract_file_name(input_json);
21+
let outname = format!("modified-{fname}");
22+
let json = serde_json::to_string_pretty(&modified_cohort)?;
23+
println!("{}", json);
24+
let file = File::create(outname)?;
25+
let writer = std::io::BufWriter::new(file);
26+
27+
serde_json::to_writer_pretty(writer, &modified_cohort)?;
28+
29+
30+
Ok(())
31+
}

bin/commands/util.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
use std::path::Path;
2+
3+
/// Get the file name at the end of the path
4+
/// If there is any error, return the original path
5+
pub(crate) fn extract_file_name(input_path: &str) -> String {
6+
let path = Path::new(input_path);
7+
if let Some(file_name_os) = path.file_name() {
8+
if let Some(file_name) = file_name_os.to_str() {
9+
return file_name.to_string();
10+
}
11+
}
12+
return input_path.to_string();
13+
}

bin/main.rs

Lines changed: 21 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -1,161 +1,34 @@
1-
// src/main.rs
2-
use clap::{Arg, ArgMatches, Command};
3-
use ga4ghphetools::dto::etl_dto::EtlDto;
1+
mod commands;
2+
3+
use clap::Command;
44
use ontolius::{io::OntologyLoaderBuilder, ontology::csr::FullCsrOntology};
55
use std::sync::Arc;
66

7-
8-
#[cfg(feature = "excel_export")]
9-
use ga4ghphetools::export::output_excel_comparison;
10-
11-
127
fn main() {
13-
let matches = Command::new("phetools")
8+
let mut cmd = Command::new("phetools")
149
.about("GA4GH Phenopacket Schema Curation Library Demo")
15-
.version(env!("CARGO_PKG_VERSION"))
16-
.subcommand(
17-
Command::new("excel")
18-
.about("Test loading of legacy Excel template")
19-
.arg(Arg::new("template").short('t').long("template").required(true))
20-
.arg(Arg::new("hpo").short('o').long("hpo").required(true))
21-
)
22-
.subcommand(
23-
Command::new("json")
24-
.about("Test loading of new JSON template")
25-
.arg(Arg::new("json").short('i').long("input"))
26-
.arg(Arg::new("hpo").short('o').long("hpo").required(true))
27-
)
28-
.subcommand(
29-
Command::new("etl")
30-
.about("Test converting an EtlDto to CohortData")
31-
.arg(Arg::new("input").short('i').long("input").required(true))
32-
.arg(Arg::new("hpo").short('o').long("hpo").required(true))
33-
)
34-
.subcommand(
35-
Command::new("version")
36-
.about("Show library version")
37-
.arg(Arg::new("version").short('v').long("version"))
38-
)
39-
.subcommand(
40-
Command::new("excel")
41-
.about("Compare two cohorts and export to Excel")
42-
.arg(Arg::new("cohort1").long("cohort1").required(true))
43-
.arg(Arg::new("cohort2").long("cohort2").required(true))
44-
.arg(Arg::new("output").long("output").required(true))
45-
.arg(Arg::new("hpo").long("hpo").required(true))
46-
.arg(
47-
Arg::new("threshold")
48-
.long("threshold")
49-
.default_value("1"),
50-
)
51-
)
52-
.get_matches();
10+
.version(env!("CARGO_PKG_VERSION"))
11+
.subcommand(commands::excel::command())
12+
.subcommand(commands::etl::command())
13+
.subcommand(commands::compare::command())
14+
.subcommand(commands::json::command())
15+
.subcommand(commands::removeterm::command());
16+
17+
let matches = cmd.clone().get_matches();
18+
5319
match matches.subcommand() {
54-
Some(("excel", sub_matches)) => handle_excel(sub_matches).expect("Could not start excel command"),
55-
Some(("json", sub_matches)) => {
56-
let input = sub_matches.get_one::<String>("input").unwrap();
57-
println!("json: {}", input);
58-
},
59-
Some(("etl", sub_matches)) => handle_etl(sub_matches).expect("Could not start ETL command"),
60-
Some(("version", sub_matches)) => {
61-
println!("Version: {}", env!("CARGO_PKG_VERSION"));
62-
},
63-
Some(("compare", sub_matches)) => {
64-
#[cfg(feature = "excel_export")]
65-
handle_compare(sub_matches).expect("Excel comparison failed");
66-
67-
#[cfg(not(feature = "excel_export"))]
68-
eprintln!("This binary was built without the `excel_export` feature");
69-
}
70-
_ => println!("No subcommand was used"),
20+
Some(("excel", sub_matches)) => commands::excel::handle(sub_matches).unwrap(),
21+
Some(("etl", sub_matches)) => commands::etl::handle(sub_matches).unwrap(),
22+
Some(("compare", sub_matches)) => commands::compare::handle(sub_matches).unwrap(),
23+
Some(("json", sub_matches)) => commands::json::handle(sub_matches).unwrap(),
24+
Some(("remove-term", sub_matches)) => commands::removeterm::handle(sub_matches).unwrap(),
25+
_ => cmd.print_help().unwrap(),
7126
}
72-
73-
}
74-
75-
#[cfg(feature = "excel_export")]
76-
fn handle_compare(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
77-
let cohort_1 = sub_matches
78-
.get_one::<String>("cohort1")
79-
.expect("cohort1 is required");
80-
81-
let cohort_2 = sub_matches
82-
.get_one::<String>("cohort2")
83-
.expect("cohort2 is required");
84-
85-
let output = sub_matches
86-
.get_one::<String>("output")
87-
.expect("output is required");
88-
89-
let hpo_path = sub_matches
90-
.get_one::<String>("hpo")
91-
.expect("hpo is required");
92-
93-
let threshold: usize = sub_matches
94-
.get_one::<String>("threshold")
95-
.unwrap()
96-
.parse()?;
97-
98-
let hpo = load_hpo(hpo_path)?;
99-
100-
output_excel_comparison(
101-
cohort_1,
102-
cohort_2,
103-
output,
104-
hpo,
105-
threshold,
106-
)
107-
.map_err(|e| e.into())
108-
}
109-
110-
fn handle_excel(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
111-
let template = sub_matches
112-
.get_one::<String>("template")
113-
.expect("template argument is required");
114-
let hpo = sub_matches
115-
.get_one::<String>("hpo")
116-
.ok_or("Missing required --hpo argument")?;
117-
118-
let hpo_arc = load_hpo(hpo)?;
119-
test_load_template(hpo_arc, template);
120-
Ok(())
121-
}
122-
123-
fn handle_etl(sub_matches: &ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
124-
let input = sub_matches.get_one::<String>("input").unwrap();
125-
println!("ETL: {}", input);
126-
let hpo = sub_matches
127-
.get_one::<String>("hpo")
128-
.ok_or("Missing required --hpo argument")?;
129-
let hpo_arc = load_hpo(hpo)?;
130-
let contents = std::fs::read_to_string(input)
131-
.map_err(|e| format!("Failed to read file: {}", e)).unwrap();
132-
let dto: EtlDto = serde_json::from_str(&contents)
133-
.map_err(|e| format!("Failed to deserialize JSON: {}", e)).unwrap();
134-
135-
let cohort = ga4ghphetools::etl::get_cohort_data_from_etl_dto(hpo_arc.clone(), dto)?;
136-
let json = serde_json::to_string_pretty(&cohort).unwrap();
137-
println!("{}", json);
138-
Ok(())
13927
}
14028

141-
142-
fn load_hpo(json_path: &str) -> Result<Arc<FullCsrOntology>, Box<dyn std::error::Error>> {
29+
/// Load HPO JSON
30+
pub fn load_hpo(json_path: &str) -> Result<Arc<FullCsrOntology>, Box<dyn std::error::Error>> {
14331
let loader = OntologyLoaderBuilder::new().obographs_parser().build();
14432
let hpo: FullCsrOntology = loader.load_from_path(json_path)?;
14533
Ok(Arc::new(hpo))
14634
}
147-
148-
149-
150-
fn test_load_template(hpo: Arc<FullCsrOntology>, template: &str) {
151-
match ga4ghphetools::factory::load_pyphetools_excel_template(template, false, hpo,|p,q|{
152-
println!("{}/{} variants validated", p, q);}) {
153-
Ok(cohort_dto) => {
154-
println!("[INFO] No errors identified for {:?}\n\n\n", cohort_dto);
155-
}
156-
Err(e) => {
157-
println!("[ERROR] {:?}", e);
158-
return;
159-
}
160-
}
161-
}

src/dto/cohort_dto.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,5 +410,24 @@ impl CohortData {
410410
pub fn phenopackets_schema_version() -> String {
411411
return PHETOOLS_SCHEMA_VERSION.to_string()
412412
}
413+
414+
pub fn set_to_latest_schema_version(&mut self) {
415+
self.phetools_schema_version = PHETOOLS_SCHEMA_VERSION.to_string();
416+
}
417+
418+
419+
pub fn remove_hpo_column(&self, tid: &str) -> Result<CohortData, String> {
420+
let mut cohort = self.clone();
421+
let idx = cohort
422+
.hpo_headers
423+
.iter()
424+
.position(|h| h.hpo_id == tid)
425+
.ok_or_else(|| format!("Could not find column that corresponds to {tid}"))?;
426+
cohort.hpo_headers.remove(idx);
427+
for row in cohort.rows.iter_mut() {
428+
row.hpo_data.remove(idx);
429+
}
430+
return Ok(cohort);
431+
}
413432

414433
}

0 commit comments

Comments
 (0)