Skip to content

Commit 2c226d9

Browse files
committed
fixing minimum lines for excel import
1 parent 027dced commit 2c226d9

File tree

3 files changed

+24
-14
lines changed

3 files changed

+24
-14
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ga4ghphetools"
3-
version = "0.5.27"
3+
version = "0.5.29"
44
edition = "2021"
55
keywords = ["GA4GH", "Phenopacket Schema", "Human Phenotype Ontology"]
66
description = "Generate GA4GH phenopackets from tabular data"

src/factory/excel.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,26 +72,24 @@ fn get_list_of_rows_from_excel(file_path: &str) -> Result<Vec<Vec<String>>, Stri
7272
}
7373

7474

75-
/// Reads an Excel file in the **legacy phenopacket-store format**
76-
/// and validates it into a structured 2D string matrix (`Vec<Vec<String>>`).
75+
/// Reads an Excel file and validates it into
76+
/// a structured 2D string matrix (`Vec<Vec<String>>`).
7777
///
7878
/// This function is specialized for early phenopacket-store input files,
7979
/// which are expected to have:
80-
/// - At least **3 rows** total (2 header rows + data).
81-
/// - The **first two rows** (`row0`, `row1`) defining the column headers.
82-
/// - Each data row having the **same number of fields** as the headers.
80+
/// - At least **2 rows** total (1 header rows + data).
81+
/// - Each data row having the **same number of fields** as the header.
8382
///
8483
/// # Behavior
8584
/// - Delegates to [`get_list_of_rows_from_excel`] to extract raw cell values.
86-
/// - Ensures the matrix has at least 3 rows; otherwise returns an error.
87-
/// - Ensures the first two rows have the same number of fields (consistent headers).
88-
/// - Ensures each subsequent row has the same number of fields as the headers.
85+
/// - Ensures the matrix has at least 2 rows; otherwise returns an error.
86+
/// - Ensures each subsequent row has the same number of fields as the header.
8987
/// - Replaces any **empty cells** (only in data rows) with the literal `"na"`.
9088
///
9189
/// # Errors
9290
/// Returns `Err(String)` in the following cases:
9391
/// - The Excel file cannot be opened or parsed (from [`get_list_of_rows_from_excel`]).
94-
/// - The file has fewer than 3 rows.
92+
/// - The file has fewer than 2 rows.
9593
/// - The two header rows have different numbers of fields.
9694
/// - Any data row has a different number of fields than the headers.
9795
///

src/ppkt/ppkt_exporter.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -392,14 +392,14 @@ mod tests {
392392
});
393393

394394
PpktExporter::strip_phenopacket_defaults(&mut packet);
395-
//println!("{}", packet);
395+
println!("{}", packet);
396396

397397
assert!(! packet["subject"]["vitalStatus"].get("survivalTimeInDays").is_some());
398398
assert_eq!(packet["subject"]["vitalStatus"]["status"], "DECEASED");
399399
}
400400

401401

402-
#[test]
402+
#[test]
403403
fn test_strip_removes_unknown_karyotypic_sex_string2() {
404404
let mut packet = json!({
405405
"subject": {
@@ -412,9 +412,21 @@ mod tests {
412412
}
413413
}
414414
});
415-
PpktExporter::strip_phenopacket_defaults(&mut packet);
415+
PpktExporter::strip_phenopacket_defaults(&mut packet);
416416
assert!(!packet["subject"].get("karyotypicSex").is_some());
417-
}
417+
}
418+
419+
#[test]
420+
fn test_do_not_add_unknown_sex() {
421+
let mut packet = json!({
422+
"subject": {
423+
"id": "PMID_29198722_p_Arg913Ter_Affected_Individual_1",
424+
}
425+
});
426+
PpktExporter::strip_phenopacket_defaults(&mut packet);
427+
println!("{:?}", packet);
428+
assert!(!packet["subject"].get("sex").is_some());
429+
}
418430

419431

420432

0 commit comments

Comments
 (0)