jkmckenna
diff --git a/‎src/smftools/hmm/HMM.py‎
Lines changed: 18 additions & 3 deletions b/‎src/smftools/hmm/HMM.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎src/smftools/hmm/call_hmm_peaks.py‎
Lines changed: 28 additions & 10 deletions b/‎src/smftools/hmm/call_hmm_peaks.py‎
Lines changed: 28 additions & 10 deletions
diff --git a/‎src/smftools/hmm/display_hmm.py‎
Lines changed: 11 additions & 6 deletions b/‎src/smftools/hmm/display_hmm.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎src/smftools/hmm/nucleosome_hmm_refinement.py‎
Lines changed: 7 additions & 2 deletions b/‎src/smftools/hmm/nucleosome_hmm_refinement.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎src/smftools/preprocessing/append_base_context.py‎
Lines changed: 10 additions & 5 deletions b/‎src/smftools/preprocessing/append_base_context.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎src/smftools/preprocessing/append_binary_layer_by_base_context.py‎
Lines changed: 16 additions & 9 deletions b/‎src/smftools/preprocessing/append_binary_layer_by_base_context.py‎
Lines changed: 16 additions & 9 deletions
diff --git a/‎src/smftools/preprocessing/binarize_on_Youden.py‎
Lines changed: 10 additions & 4 deletions b/‎src/smftools/preprocessing/binarize_on_Youden.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎src/smftools/preprocessing/binary_layers_to_ohe.py‎
Lines changed: 5 additions & 1 deletion b/‎src/smftools/preprocessing/binary_layers_to_ohe.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/smftools/preprocessing/calculate_coverage.py‎
Lines changed: 6 additions & 1 deletion b/‎src/smftools/preprocessing/calculate_coverage.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/smftools/preprocessing/calculate_position_Youden.py‎
Lines changed: 6 additions & 3 deletions b/‎src/smftools/preprocessing/calculate_position_Youden.py‎
Lines changed: 6 additions & 3 deletions
@@ -10,6 +10,9 @@
 import torch.nn as nn
 from scipy.sparse import issparse
 
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
 # =============================================================================
 # Registry / Factory
 # =============================================================================
@@ -1228,7 +1231,11 @@ def fit_em(
             self._normalize_emission()
 
             if verbose:
-                print(f"[SingleBernoulliHMM.fit] iter={it} ll_proxy={hist[-1]:.6f}")
+                logger.info(
+                    "[SingleBernoulliHMM.fit] iter=%s ll_proxy=%.6f",
+                    it,
+                    hist[-1],
+                )
 
             if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
                 break
@@ -1450,7 +1457,11 @@ def fit_em(
             self._normalize_emission()
 
             if verbose:
-                print(f"[MultiBernoulliHMM.fit] iter={it} ll_proxy={hist[-1]:.6f}")
+                logger.info(
+                    "[MultiBernoulliHMM.fit] iter=%s ll_proxy=%.6f",
+                    it,
+                    hist[-1],
+                )
 
             if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
                 break
@@ -1783,7 +1794,11 @@ def fit_em(
             self._normalize_trans_by_bin()
 
             if verbose:
-                print(f"[DistanceBinnedSingle.fit] iter={it} ll_proxy={hist[-1]:.6f}")
+                logger.info(
+                    "[DistanceBinnedSingle.fit] iter=%s ll_proxy=%.6f",
+                    it,
+                    hist[-1],
+                )
 
             if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
                 break
 
@@ -3,6 +3,10 @@
 from pathlib import Path
 from typing import Any, Dict, Optional, Sequence, Union
 
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
 
 def call_hmm_peaks(
     adata,
@@ -107,8 +111,10 @@ def call_hmm_peaks(
                 candidates = [feature_key]
 
             if not candidates:
-                print(
-                    f"[call_hmm_peaks] WARNING: no layers found matching '{feature_key}' in ref '{ref}'. Skipping."
+                logger.warning(
+                    "[call_hmm_peaks] No layers found matching '%s' in ref '%s'. Skipping.",
+                    feature_key,
+                    ref,
                 )
                 continue
 
@@ -121,17 +127,22 @@ def call_hmm_peaks(
 
             for layer_name in candidates:
                 if layer_name not in adata.layers:
-                    print(
-                        f"[call_hmm_peaks] WARNING: layer '{layer_name}' not in adata.layers; skipping."
+                    logger.warning(
+                        "[call_hmm_peaks] Layer '%s' not in adata.layers; skipping.",
+                        layer_name,
                     )
                     continue
 
                 # Dense layer data
                 L = adata.layers[layer_name]
                 L = L.toarray() if issparse(L) else np.asarray(L)
                 if L.shape != (adata.n_obs, adata.n_vars):
-                    print(
-                        f"[call_hmm_peaks] WARNING: layer '{layer_name}' has shape {L.shape}, expected ({adata.n_obs}, {adata.n_vars}); skipping."
+                    logger.warning(
+                        "[call_hmm_peaks] Layer '%s' has shape %s, expected (%s, %s); skipping.",
+                        layer_name,
+                        L.shape,
+                        adata.n_obs,
+                        adata.n_vars,
                     )
                     continue
 
@@ -154,7 +165,11 @@ def call_hmm_peaks(
                     peak_metric, prominence=peak_prom, distance=min_distance
                 )
                 if peak_indices.size == 0:
-                    print(f"[call_hmm_peaks] No peaks for layer '{layer_name}' in ref '{ref}'.")
+                    logger.info(
+                        "[call_hmm_peaks] No peaks for layer '%s' in ref '%s'.",
+                        layer_name,
+                        ref,
+                    )
                     continue
 
                 peak_centers = coordinates[peak_indices]
@@ -185,7 +200,7 @@ def call_hmm_peaks(
                     safe_layer = str(layer_name).replace("/", "_")
                     fname = output_dir / f"{tag}_{safe_layer}_{safe_ref}_peaks.png"
                     fig.savefig(fname, bbox_inches="tight", dpi=200)
-                    print(f"[call_hmm_peaks] Saved plot to {fname}")
+                    logger.info("[call_hmm_peaks] Saved plot to %s", fname)
                     plt.close(fig)
                 else:
                     fig.tight_layout()
@@ -285,8 +300,11 @@ def call_hmm_peaks(
                 else:
                     adata.var[any_col] = False
 
-                print(
-                    f"[call_hmm_peaks] Annotated {len(peak_centers)} peaks for layer '{layer_name}' in ref '{ref}'."
+                logger.info(
+                    "[call_hmm_peaks] Annotated %s peaks for layer '%s' in ref '%s'.",
+                    len(peak_centers),
+                    layer_name,
+                    ref,
                 )
 
     # global any-peak across all layers/refs
 
@@ -1,19 +1,24 @@
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
+
 def display_hmm(hmm, state_labels=["Non-Methylated", "Methylated"], obs_labels=["0", "1"]):
     import torch
 
-    print("\n**HMM Model Overview**")
-    print(hmm)
+    logger.info("**HMM Model Overview**")
+    logger.info("%s", hmm)
 
-    print("\n**Transition Matrix**")
+    logger.info("**Transition Matrix**")
     transition_matrix = torch.exp(hmm.edges).detach().cpu().numpy()
     for i, row in enumerate(transition_matrix):
         label = state_labels[i] if state_labels else f"State {i}"
         formatted_row = ", ".join(f"{p:.6f}" for p in row)
-        print(f"{label}: [{formatted_row}]")
+        logger.info("%s: [%s]", label, formatted_row)
 
-    print("\n**Emission Probabilities**")
+    logger.info("**Emission Probabilities**")
     for i, dist in enumerate(hmm.distributions):
         label = state_labels[i] if state_labels else f"State {i}"
         probs = dist.probs.detach().cpu().numpy()
         formatted_emissions = {obs_labels[j]: probs[j] for j in range(len(probs))}
-        print(f"{label}: {formatted_emissions}")
+        logger.info("%s: %s", label, formatted_emissions)
@@ -1,3 +1,8 @@
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
+
 def refine_nucleosome_calls(
     adata,
     layer_name,
@@ -71,7 +76,7 @@ def refine_nucleosome_calls(
     adata.layers[f"{layer_name}_hexamers"] = hexamer_layer
     adata.layers[f"{layer_name}_octamers"] = octamer_layer
 
-    print(f"Added layers: {layer_name}_hexamers and {layer_name}_octamers")
+    logger.info("Added layers: %s_hexamers and %s_octamers", layer_name, layer_name)
     return adata
 
 
@@ -154,5 +159,5 @@ def infer_nucleosomes_in_large_bound(
                         pos_cursor += 1
 
     adata.layers[f"{large_bound_layer}_phased_nucleosomes"] = inferred_layer
-    print(f"Added layer: {large_bound_layer}_phased_nucleosomes")
+    logger.info("Added layer: %s_phased_nucleosomes", large_bound_layer)
     return adata
@@ -1,3 +1,8 @@
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
+
 def append_base_context(
     adata,
     ref_column="Reference_strand",
@@ -30,7 +35,7 @@ def append_base_context(
         # QC already performed; nothing to do
         return
 
-    print("Adding base context based on reference FASTA sequence for sample")
+    logger.info("Adding base context based on reference FASTA sequence for sample")
     references = adata.obs[ref_column].cat.categories
     site_types = []
 
@@ -95,8 +100,8 @@ def append_base_context(
                         elif sequence[i - 1] != "C" and sequence[i + 1] != "C":
                             boolean_dict[f"{ref}_other_C_site"][i] = True
             else:
-                print(
-                    "Error: top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
+                logger.error(
+                    "Top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
                 )
 
         if "A" in mod_target_bases:
@@ -111,8 +116,8 @@ def append_base_context(
                     if sequence[i] == "T":
                         boolean_dict[f"{ref}_A_site"][i] = True
             else:
-                print(
-                    "Error: top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
+                logger.error(
+                    "Top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
                 )
 
         for site_type in site_types:
 
@@ -1,6 +1,10 @@
 import numpy as np
 import scipy.sparse as sp
 
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
 
 def append_binary_layer_by_base_context(
     adata,
@@ -65,7 +69,10 @@ def append_binary_layer_by_base_context(
     def _col_mask_or_warn(colname):
         if colname not in adata.var.columns:
             if verbose:
-                print(f"Warning: var column '{colname}' not found; treating as all-False mask.")
+                logger.warning(
+                    "Var column '%s' not found; treating as all-False mask.",
+                    colname,
+                )
             return np.zeros(n_vars, dtype=bool)
         vals = adata.var[colname].values
         # coerce truthiness
@@ -86,7 +93,7 @@ def _col_mask_or_warn(colname):
     X = adata.X
     if sp.issparse(X):
         if verbose:
-            print("Converting sparse X to dense array for layer construction (temporary).")
+            logger.info("Converting sparse X to dense array for layer construction (temporary).")
         X = X.toarray()
     X = np.asarray(X, dtype=np.float32)
 
@@ -149,13 +156,13 @@ def _col_mask_or_warn(colname):
         def _filled_positions(arr):
             return int(np.sum(~np.isnan(arr)))
 
-        print("Layer build summary (non-NaN cell counts):")
-        print(f"  GpC: {_filled_positions(masked_gpc)}")
-        print(f"  CpG: {_filled_positions(masked_cpg)}")
-        print(f"  GpC+CpG combined: {_filled_positions(combined_sum)}")
-        print(f"  C: {_filled_positions(masked_any_c)}")
-        print(f"  other_C: {_filled_positions(masked_other_c)}")
-        print(f"  A: {_filled_positions(masked_a)}")
+        logger.info("Layer build summary (non-NaN cell counts):")
+        logger.info("  GpC: %s", _filled_positions(masked_gpc))
+        logger.info("  CpG: %s", _filled_positions(masked_cpg))
+        logger.info("  GpC+CpG combined: %s", _filled_positions(combined_sum))
+        logger.info("  C: %s", _filled_positions(masked_any_c))
+        logger.info("  other_C: %s", _filled_positions(masked_other_c))
+        logger.info("  A: %s", _filled_positions(masked_a))
 
     # mark as done
     adata.uns[uns_flag] = True
 
@@ -1,3 +1,8 @@
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
+
 def binarize_on_Youden(
     adata,
     ref_column: str = "Reference_strand",
@@ -40,7 +45,7 @@ def binarize_on_Youden(
     ref_labels = adata.obs[ref_column].to_numpy()
 
     for ref in references:
-        print(f"Binarizing on Youden statistics for {ref}")
+        logger.info("Binarizing on Youden statistics for %s", ref)
 
         ref_mask = ref_labels == ref
         if not np.any(ref_mask):
@@ -84,9 +89,10 @@ def binarize_on_Youden(
         binarized[ref_mask, :] = block_out
 
     adata.layers[output_layer_name] = binarized
-    print(
-        f"Finished binarization → stored in adata.layers['{output_layer_name}'] "
-        f"(mask_failed_positions={mask_failed_positions})"
+    logger.info(
+        "Finished binarization → stored in adata.layers['%s'] (mask_failed_positions=%s)",
+        output_layer_name,
+        mask_failed_positions,
     )
 
 
 
@@ -1,5 +1,9 @@
 ## binary_layers_to_ohe
 
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
 ## Conversion SMF Specific
 def binary_layers_to_ohe(adata, binary_layers, stack="hstack"):
     """
@@ -23,7 +27,7 @@ def binary_layers_to_ohe(adata, binary_layers, stack="hstack"):
     N_binary_layer = [layer for layer in binary_layers if layer == "N_binary_encoding"]
     # Add the N_binary_encoding layer to the end of the list of binary layers
     all_binary_layers = ACGT_binary_layers + N_binary_layer
-    print(f"Found {all_binary_layers} layers in adata")
+    logger.info("Found %s layers in adata", all_binary_layers)
 
     # Extract the layers
     layers = [adata.layers[layer_name] for layer_name in all_binary_layers]
 
@@ -1,3 +1,8 @@
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
+
+
 def calculate_coverage(
     adata,
     ref_column="Reference_strand",
@@ -34,7 +39,7 @@ def calculate_coverage(
 
     # Loop over references
     for ref in references:
-        print(f"Assessing positional coverage across samples for {ref} reference")
+        logger.info("Assessing positional coverage across samples for %s reference", ref)
 
         # Subset to current category
         ref_mask = adata.obs[ref_column] == ref
 
@@ -1,5 +1,8 @@
 ## calculate_position_Youden
 ## Calculating and applying position level thresholds for methylation calls to binarize the SMF data
+from smftools.logging_utils import get_logger
+
+logger = get_logger(__name__)
 def calculate_position_Youden(
     adata,
     positive_control_sample=None,
@@ -37,7 +40,7 @@ def calculate_position_Youden(
     references = adata.obs[ref_column].cat.categories
     # Iterate over each category in the specified obs_column
     for ref in references:
-        print(f"Calculating position Youden statistics for {ref}")
+        logger.info("Calculating position Youden statistics for %s", ref)
         # Subset to keep only reads associated with the category
         ref_subset = adata[adata.obs[ref_column] == ref]
         # Iterate over positive and negative control samples
@@ -58,7 +61,7 @@ def calculate_position_Youden(
                     threshold = np.percentile(sorted_column, infer_on_percentile)
                     control_subset = ref_subset[ref_subset.obs[inference_variable] <= threshold, :]
             elif not infer_on_percentile and not control:
-                print(
+                logger.error(
                     "Can not threshold Anndata on Youden threshold. Need to either provide control samples or set infer_on_percentile to True"
                 )
                 return
@@ -152,4 +155,4 @@ def calculate_position_Youden(
             True if i > J_threshold else False for i in J_max_list
         ]
 
-    print("Finished calculating position Youden statistics")
+    logger.info("Finished calculating position Youden statistics")