P2GX · psnairne · Mar 10, 2026 · SmartMonkey-git · Mar 12, 2026 · SmartMonkey-git
diff --git a/phenoxtract/src/transform/strategies/age_to_iso8601.rs b/phenoxtract/src/transform/strategies/age_to_iso8601.rs
@@ -13,14 +13,36 @@ use std::any::type_name;
 use std::collections::{HashMap, HashSet};
 
 #[derive(Debug)]
+/// # Description
+///
 /// Given a column whose cells contains ages (e.g. subject age, age of death, age of onset)
-/// Given a column whose cells contains ages (e.g. subject age, age of death, age of onset)
+/// Given a column whose cells contains ages (e.g. [`Context:SubjectAge`], [`Context:AgeOfDeath`], [`Context:AgeOfOnset`])
-/// Given a column whose cells contains ages (e.g. subject age, age of death, age of onset)
+/// Given a column whose cells contains ages (e.g. [`Context:SubjectAge`], [`Context:AgeOfDeath`], [`Context:AgeOfOnset`])
 /// this strategy converts integer entries to ISO8601 durations: 47 -> P47Y
+///
 /// NOTE: the integers must be between 0 and 150.
-/// NOTE: the integers must be between 0 and 150.
+/// ## Note
+Integers must be between 0 and 150.
-/// NOTE: the integers must be between 0 and 150.
+/// ## Note
+Integers must be between 0 and 150.
 ///
 /// If an entry is already in ISO8601 duration format, it will be left unchanged.
 ///
-/// If there are cell values which are neither ISO8601 durations nor integers
+/// # Example
+///
+/// The table
+/// ```text
-/// ```text
+/// ```csv
-/// ```text
+/// ```csv
+/// PatientId, age_at_last_encounter
+/// P001, 47
+/// P002, P56Y12M3D
+/// ```
+///
+/// will be mapped to
+/// ```text
+/// PatientId, age_at_last_encounter
+/// P001, P47Y
+/// P002, P56Y12M3D
+/// ```
+///
+/// # Errors
+///
+/// If there are cell values which are neither ISO8601 durations nor integers (between 0 and 150)
 /// an error will be returned.
+///
 pub struct AgeToIso8601Strategy {
     min_age: i32,
     max_age: i32,

diff --git a/phenoxtract/src/transform/strategies/alias_map.rs b/phenoxtract/src/transform/strategies/alias_map.rs
@@ -8,15 +8,47 @@ use polars::datatypes::{DataType, PlSmallStr};
 use polars::prelude::{ChunkApply, Column};
 use std::borrow::Cow;
 
-/// Given a collection of contextualised dataframes, this strategy will apply all the aliases
-/// found in the SeriesContexts.
-/// For example if a Contextualised Dataframe has a SeriesContext consisting of a SubjectSex column
-/// and a ToString AliasMap which converts "M" to "Male" and "F" to "Female"
+/// # Description
+///
+/// Given a collection of `ContextualiseDataframes`, this strategy will apply all the aliases
-/// Given a collection of `ContextualiseDataframes`, this strategy will apply all the aliases
+/// Given a collection of [`ContextualisedDataframe`], this strategy will apply all the aliases
-/// Given a collection of `ContextualiseDataframes`, this strategy will apply all the aliases
+/// Given a collection of [`ContextualisedDataframe`], this strategy will apply all the aliases
+/// found in the `SeriesContexts`.
+///
+/// For example if a `ContextualisedDataframe` has a `SeriesContext` consisting of a `subject_sex` column
+/// and a `ToString` `AliasMap`, which converts "M" to "Male" and "F" to "Female",
 /// then the strategy will apply those aliases to each cell.
+///
 /// # NOTE
 /// - This does not transform the headers of the Dataframe.
 /// - Only non-null cells may be aliased.
 /// - Non-null cells may be aliased to null.
+///
+/// # Example
+///
+/// If the `SeriesContext` for the `age_at_last_encounter` column
+/// has a `ToInt` `AliasMap` with a single alias `Less than 1 year` -> `0` then the table
-/// has a `ToInt` `AliasMap` with a single alias `Less than 1 year` -> `0` then the table
+/// has a `ToInt` `AliasMap` with a single alias `Less than 1 year` -> `0` then the table:
-/// has a `ToInt` `AliasMap` with a single alias `Less than 1 year` -> `0` then the table
+/// has a `ToInt` `AliasMap` with a single alias `Less than 1 year` -> `0` then the table:
+///
+/// ```text
+/// PatientId, age_at_last_encounter
+/// P001, 4
+/// P002, Less than 1 year
+/// ```
+///
+/// is mapped to
+/// ```text
+/// PatientId, age_at_last_encounter
+/// P001, 4
+/// P002, 0
+/// ```
+///
+/// # Errors
+///
+/// Errors will be thrown if:
+/// - Any columns to be aliased cannot be cast to String datatype.
+/// - Once the aliases have been applied,
+///   the column cannot be cast to the desired `OutputDataType` of the `AliasMap`.
+///   For example if it is a `ToInt` `AliasMap`, yet there remain strings in the column after
+///   the aliases have been applied.
+///
 #[derive(Debug)]
 pub struct AliasMapStrategy;
 

diff --git a/phenoxtract/src/transform/strategies/date_to_age.rs b/phenoxtract/src/transform/strategies/date_to_age.rs
@@ -21,12 +21,42 @@ use std::collections::{HashMap, HashSet};
 
 #[allow(dead_code)]
 #[derive(Debug, Default)]
+/// # Description
+///
 /// This strategy finds columns whose cells contain dates, and converts these dates
 /// to a certain age of the patient, by leveraging the patient's date of birth.
 ///
 /// If there is no data on a certain patient's date of birth,
 /// yet there is a date corresponding to this patient,
 /// then an error will be thrown.
+///
+/// # Example
+///
+/// The table
+///
+/// ```text
+/// PatientId, DOB, TimeAtLastEncounter
+/// P001, 1990, 1995
+/// P002, 1992,
+/// P003, 2000, 2004
+/// P004,,
+/// ```
+///
+/// is mapped to
+/// ```text
+/// PatientId, DOB, TimeAtLastEncounter
+/// P001, 1990, 5
+/// P002, 1992,
+/// P003, 2000, 4
+/// P004,,
+/// ```
+///
+/// # Errors
+///
+/// An error will be thrown if
+/// 1. A DOB is before to a date for a patient, leading to a negative age.
-/// 1. A DOB is before to a date for a patient, leading to a negative age.
+/// 1. A date of birth is before to a date for a patient, leading to a negative age.
-/// 1. A DOB is before to a date for a patient, leading to a negative age.
+/// 1. A date of birth is before to a date for a patient, leading to a negative age.
+/// 2. There exists a date which cannot be converted to an age due to missing DOB data.
+///
 pub struct DateToAgeStrategy;
 
 impl Strategy for DateToAgeStrategy {

diff --git a/phenoxtract/src/transform/strategies/hpo_disease_splitter.rs b/phenoxtract/src/transform/strategies/hpo_disease_splitter.rs
@@ -12,24 +12,54 @@ use std::any::type_name;
 use std::collections::HashSet;
 use std::sync::Arc;
 
+/// # Description
+///
 /// This strategy will find every column whose context is HpoOrDisease
 /// And split it into two separate columns: a Hpo column and a disease column.
 ///
 /// Hpo is prioritised: the strategy will find all Hpo labels and IDs, and then put them into the
 /// Hpo column. All other cells will be assumed to refer to disease.
 ///
+/// # Fields
+///
+/// * `hpo_bidict_lib` - This should contain BiDictLibrary for the version of HPO that you want to use.
+/// * `disease_bidict_lib` - All non-HPO cells will be processed by this disease BiDictLibrary.
+///
+/// # Example
+///
+/// The table
+///
+/// ```text
+/// PatientId, conditions
+/// P001, HP:1234567
+/// P002, Arachnodactyly
+/// P003, Marfan Syndrome
+/// ```
+/// is mapped to
+///
+/// ```text
+/// PatientId, conditions_hpo, conditions_disease
+/// P001, HP:1234567,
+/// P002, Arachnodactyly,
+/// P003,,Marfan Syndrome
+/// ```
+///
+/// # Errors
+///
+/// A `TransformError::MappingError` will be thrown if any cells in the HpoOrDisease column
+/// are not a label or ID in either the `hpo_bidict_lib` or the `disease_bidict_lib`.
 #[derive(Debug)]
 pub struct HpoDiseaseSplitterStrategy {
-    hpo_dict_lib: Arc<BiDictLibrary>,
-    disease_dict_lib: Arc<BiDictLibrary>,
+    hpo_bidict_lib: Arc<BiDictLibrary>,
+    disease_bidict_lib: Arc<BiDictLibrary>,
 }
 
 impl HpoDiseaseSplitterStrategy {
     #[allow(unused)]
-    pub fn new(hpo_dict_lib: Arc<BiDictLibrary>, disease_dict_lib: Arc<BiDictLibrary>) -> Self {
+    pub fn new(hpo_bidict_lib: Arc<BiDictLibrary>, disease_bidict_lib: Arc<BiDictLibrary>) -> Self {
         Self {
-            hpo_dict_lib,
-            disease_dict_lib,
+            hpo_bidict_lib,
+            disease_bidict_lib,
         }
     }
 }
@@ -67,10 +97,10 @@ impl Strategy for HpoDiseaseSplitterStrategy {
                 for hpo_or_disease_opt in hpo_or_disease_col.str()?.iter() {
                     match hpo_or_disease_opt {
                         Some(hpo_or_disease) => {
-                            if self.hpo_dict_lib.lookup(hpo_or_disease).is_some() {
+                            if self.hpo_bidict_lib.lookup(hpo_or_disease).is_some() {
                                 new_hpo_col_data.push(AnyValue::String(hpo_or_disease));
                                 new_disease_col_data.push(AnyValue::Null);
-                            } else if self.disease_dict_lib.lookup(hpo_or_disease).is_some() {
+                            } else if self.disease_bidict_lib.lookup(hpo_or_disease).is_some() {
                                 new_hpo_col_data.push(AnyValue::Null);
                                 new_disease_col_data.push(AnyValue::String(hpo_or_disease))
                             } else {
@@ -171,8 +201,8 @@ mod tests {
             .unwrap();
 
         let strategy = HpoDiseaseSplitterStrategy {
-            hpo_dict_lib: Arc::new(BiDictLibrary::new("hpo", vec![Box::new(HPO_DICT.clone())])),
-            disease_dict_lib: Arc::new(BiDictLibrary::new(
+            hpo_bidict_lib: Arc::new(BiDictLibrary::new("hpo", vec![Box::new(HPO_DICT.clone())])),
+            disease_bidict_lib: Arc::new(BiDictLibrary::new(
                 "disease",
                 vec![Box::new(MONDO_BIDICT.clone())],
             )),

diff --git a/phenoxtract/src/transform/strategies/mapping.rs b/phenoxtract/src/transform/strategies/mapping.rs
@@ -23,6 +23,8 @@ pub enum DefaultMapping {
     SexMapping,
 }
 
+/// # Description
+///
 /// A strategy for mapping string values to standardized terms using a synonym dictionary.
 ///
 /// `MappingStrategy` transforms data by replacing cell values with their corresponding
@@ -48,7 +50,7 @@ pub enum DefaultMapping {
 /// # Errors
 ///
 /// Returns `TransformError::MappingError` if any values in the data cannot be found
-/// in the synonym map, providing details about unmapped values and suggestions.
+/// in the synonym map. The error will provide details about unmapped values and make suggestions.
 #[derive(Debug)]
 pub struct MappingStrategy {
     synonym_map: HashMap<String, String>,

diff --git a/phenoxtract/src/transform/strategies/multi_hpo_col_expansion.rs b/phenoxtract/src/transform/strategies/multi_hpo_col_expansion.rs
@@ -11,7 +11,9 @@ use ordermap::{OrderMap, OrderSet};
 use polars::prelude::{AnyValue, Column, DataType, StringChunked};
 use regex::Regex;
 
-/// A strategy for converting columns whose cells contain HPO IDs
+/// # Description
+///
+/// A strategy for converting columns whose cells contain multiple HPO IDs
 /// into several columns whose headers are exactly those HPO IDs
 /// and whose cells contain the ObservationStatus for each patient.
 ///
@@ -21,8 +23,28 @@ use regex::Regex;
 /// A new SeriesContext will be added for each block of new columns.
 ///
 /// The old columns and contexts will be removed.
+///
+/// # Example
+///
+/// Data of the format
+///
+/// ```text
+/// PatientId, MultiHpo
+/// P001,HP:1111111 and HP:2222222
+/// P002,HP:2222222
+/// ```
+/// will be mapped to
+/// ```text
+/// PatientId,HP:1111111,HP:2222222
+/// P001,true,true
+/// P002,,true
+/// ```
+/// # Errors
+///
+/// An error will occur if there are MultiHpoId columns which do not have String datatype.
 #[derive(Debug)]
 pub struct MultiHPOColExpansionStrategy;
+
 impl Strategy for MultiHPOColExpansionStrategy {
     fn is_valid(&self, tables: &[&mut ContextualizedDataFrame]) -> bool {
         tables.iter().any(|table| {

diff --git a/phenoxtract/src/transform/strategies/ontology_normaliser.rs b/phenoxtract/src/transform/strategies/ontology_normaliser.rs
@@ -15,28 +15,49 @@ use std::collections::HashSet;
 use std::sync::Arc;
 
 #[derive(Debug)]
+/// # Description
+///
 /// A strategy that converts ontology labels in cells (or synonyms of them) to the corresponding IDs.
 /// It is case-insensitive.
 ///
 /// This strategy processes string columns in data tables by looking up values in an ontology
 /// bidirectional dictionary and replacing labels with their corresponding IDs.
 /// It only operates on columns that have no header context and match the specified data context.
 ///
+/// When applied to tables, the strategy:
+/// 1. Identifies string columns with no header context that match the data context
+/// 2. For each cell value, attempts to maps it via the ontology dictionary to its ID.
+/// 3. Replaces the original value with the ID
+/// 4. Collects mapping errors for any values that couldn't be resolved
+/// 5. Returns an error if any labels failed to map (except for null values)
+///
 /// # Fields
 ///
 /// * `ontology_dict` - A thread-safe reference to a bidirectional ontology dictionary that
 ///   maps between HPO labels and their primary identifiers. E.g. the HPO bidirectional dictionary
 /// * `data_context` - The specific data context that columns must match to be processed
 ///   by this strategy. E.g. HpoLabelOrId
 ///
-/// # Behavior
+/// # Example
+///
+/// If `OntologyNormaliser` is applied with `ContextKind = Hpo`, then
+///
+/// ```text
+/// PatientId, Hpo
+/// P001, Pneumonia
+/// P002, HP:1234567
+/// ```
+/// is mapped to
+/// ```text
+/// PatientId, Hpo
+/// P001, HP:0002090
+/// P002, HP:1234567
+/// ```
+///
+/// # Errors
+///
+/// Returns `TransformError::MappingError` if any cell values do not match an ontology ID or label.
 ///
-/// When applied to tables, the strategy:
-/// 1. Identifies string columns with no header context that match the data context
-/// 2. For each cell value, attempts to maps it via the ontology dictionary to its ID.
-/// 3. Replaces the original value with the ID
-/// 4. Collects mapping errors for any values that couldn't be resolved
-/// 5. Returns an error if any labels failed to map (except for null values)
 pub struct OntologyNormaliserStrategy {
     ontology_dict: Arc<OntologyBiDict>,
     data_context_kind: ContextKind,