Skip to content

Commit 59db2b5

Browse files
authored
Merge pull request #180 from jkmckenna/codex/add-logging-strategy-to-subpackages
Codex-generated pull request
2 parents fb13ef5 + cc8f1f9 commit 59db2b5

25 files changed

+308
-127
lines changed

src/smftools/hmm/HMM.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
import torch.nn as nn
1111
from scipy.sparse import issparse
1212

13+
from smftools.logging_utils import get_logger
14+
15+
logger = get_logger(__name__)
1316
# =============================================================================
1417
# Registry / Factory
1518
# =============================================================================
@@ -1228,7 +1231,11 @@ def fit_em(
12281231
self._normalize_emission()
12291232

12301233
if verbose:
1231-
print(f"[SingleBernoulliHMM.fit] iter={it} ll_proxy={hist[-1]:.6f}")
1234+
logger.info(
1235+
"[SingleBernoulliHMM.fit] iter=%s ll_proxy=%.6f",
1236+
it,
1237+
hist[-1],
1238+
)
12321239

12331240
if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
12341241
break
@@ -1450,7 +1457,11 @@ def fit_em(
14501457
self._normalize_emission()
14511458

14521459
if verbose:
1453-
print(f"[MultiBernoulliHMM.fit] iter={it} ll_proxy={hist[-1]:.6f}")
1460+
logger.info(
1461+
"[MultiBernoulliHMM.fit] iter=%s ll_proxy=%.6f",
1462+
it,
1463+
hist[-1],
1464+
)
14541465

14551466
if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
14561467
break
@@ -1783,7 +1794,11 @@ def fit_em(
17831794
self._normalize_trans_by_bin()
17841795

17851796
if verbose:
1786-
print(f"[DistanceBinnedSingle.fit] iter={it} ll_proxy={hist[-1]:.6f}")
1797+
logger.info(
1798+
"[DistanceBinnedSingle.fit] iter=%s ll_proxy=%.6f",
1799+
it,
1800+
hist[-1],
1801+
)
17871802

17881803
if len(hist) > 1 and abs(hist[-1] - hist[-2]) < float(tol):
17891804
break

src/smftools/hmm/call_hmm_peaks.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
from pathlib import Path
44
from typing import Any, Dict, Optional, Sequence, Union
55

6+
from smftools.logging_utils import get_logger
7+
8+
logger = get_logger(__name__)
9+
610

711
def call_hmm_peaks(
812
adata,
@@ -107,8 +111,10 @@ def call_hmm_peaks(
107111
candidates = [feature_key]
108112

109113
if not candidates:
110-
print(
111-
f"[call_hmm_peaks] WARNING: no layers found matching '{feature_key}' in ref '{ref}'. Skipping."
114+
logger.warning(
115+
"[call_hmm_peaks] No layers found matching '%s' in ref '%s'. Skipping.",
116+
feature_key,
117+
ref,
112118
)
113119
continue
114120

@@ -121,17 +127,22 @@ def call_hmm_peaks(
121127

122128
for layer_name in candidates:
123129
if layer_name not in adata.layers:
124-
print(
125-
f"[call_hmm_peaks] WARNING: layer '{layer_name}' not in adata.layers; skipping."
130+
logger.warning(
131+
"[call_hmm_peaks] Layer '%s' not in adata.layers; skipping.",
132+
layer_name,
126133
)
127134
continue
128135

129136
# Dense layer data
130137
L = adata.layers[layer_name]
131138
L = L.toarray() if issparse(L) else np.asarray(L)
132139
if L.shape != (adata.n_obs, adata.n_vars):
133-
print(
134-
f"[call_hmm_peaks] WARNING: layer '{layer_name}' has shape {L.shape}, expected ({adata.n_obs}, {adata.n_vars}); skipping."
140+
logger.warning(
141+
"[call_hmm_peaks] Layer '%s' has shape %s, expected (%s, %s); skipping.",
142+
layer_name,
143+
L.shape,
144+
adata.n_obs,
145+
adata.n_vars,
135146
)
136147
continue
137148

@@ -154,7 +165,11 @@ def call_hmm_peaks(
154165
peak_metric, prominence=peak_prom, distance=min_distance
155166
)
156167
if peak_indices.size == 0:
157-
print(f"[call_hmm_peaks] No peaks for layer '{layer_name}' in ref '{ref}'.")
168+
logger.info(
169+
"[call_hmm_peaks] No peaks for layer '%s' in ref '%s'.",
170+
layer_name,
171+
ref,
172+
)
158173
continue
159174

160175
peak_centers = coordinates[peak_indices]
@@ -185,7 +200,7 @@ def call_hmm_peaks(
185200
safe_layer = str(layer_name).replace("/", "_")
186201
fname = output_dir / f"{tag}_{safe_layer}_{safe_ref}_peaks.png"
187202
fig.savefig(fname, bbox_inches="tight", dpi=200)
188-
print(f"[call_hmm_peaks] Saved plot to {fname}")
203+
logger.info("[call_hmm_peaks] Saved plot to %s", fname)
189204
plt.close(fig)
190205
else:
191206
fig.tight_layout()
@@ -285,8 +300,11 @@ def call_hmm_peaks(
285300
else:
286301
adata.var[any_col] = False
287302

288-
print(
289-
f"[call_hmm_peaks] Annotated {len(peak_centers)} peaks for layer '{layer_name}' in ref '{ref}'."
303+
logger.info(
304+
"[call_hmm_peaks] Annotated %s peaks for layer '%s' in ref '%s'.",
305+
len(peak_centers),
306+
layer_name,
307+
ref,
290308
)
291309

292310
# global any-peak across all layers/refs

src/smftools/hmm/display_hmm.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
1+
from smftools.logging_utils import get_logger
2+
3+
logger = get_logger(__name__)
4+
5+
16
def display_hmm(hmm, state_labels=["Non-Methylated", "Methylated"], obs_labels=["0", "1"]):
27
import torch
38

4-
print("\n**HMM Model Overview**")
5-
print(hmm)
9+
logger.info("**HMM Model Overview**")
10+
logger.info("%s", hmm)
611

7-
print("\n**Transition Matrix**")
12+
logger.info("**Transition Matrix**")
813
transition_matrix = torch.exp(hmm.edges).detach().cpu().numpy()
914
for i, row in enumerate(transition_matrix):
1015
label = state_labels[i] if state_labels else f"State {i}"
1116
formatted_row = ", ".join(f"{p:.6f}" for p in row)
12-
print(f"{label}: [{formatted_row}]")
17+
logger.info("%s: [%s]", label, formatted_row)
1318

14-
print("\n**Emission Probabilities**")
19+
logger.info("**Emission Probabilities**")
1520
for i, dist in enumerate(hmm.distributions):
1621
label = state_labels[i] if state_labels else f"State {i}"
1722
probs = dist.probs.detach().cpu().numpy()
1823
formatted_emissions = {obs_labels[j]: probs[j] for j in range(len(probs))}
19-
print(f"{label}: {formatted_emissions}")
24+
logger.info("%s: %s", label, formatted_emissions)

src/smftools/hmm/nucleosome_hmm_refinement.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from smftools.logging_utils import get_logger
2+
3+
logger = get_logger(__name__)
4+
5+
16
def refine_nucleosome_calls(
27
adata,
38
layer_name,
@@ -71,7 +76,7 @@ def refine_nucleosome_calls(
7176
adata.layers[f"{layer_name}_hexamers"] = hexamer_layer
7277
adata.layers[f"{layer_name}_octamers"] = octamer_layer
7378

74-
print(f"Added layers: {layer_name}_hexamers and {layer_name}_octamers")
79+
logger.info("Added layers: %s_hexamers and %s_octamers", layer_name, layer_name)
7580
return adata
7681

7782

@@ -154,5 +159,5 @@ def infer_nucleosomes_in_large_bound(
154159
pos_cursor += 1
155160

156161
adata.layers[f"{large_bound_layer}_phased_nucleosomes"] = inferred_layer
157-
print(f"Added layer: {large_bound_layer}_phased_nucleosomes")
162+
logger.info("Added layer: %s_phased_nucleosomes", large_bound_layer)
158163
return adata

src/smftools/preprocessing/append_base_context.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from smftools.logging_utils import get_logger
2+
3+
logger = get_logger(__name__)
4+
5+
16
def append_base_context(
27
adata,
38
ref_column="Reference_strand",
@@ -30,7 +35,7 @@ def append_base_context(
3035
# QC already performed; nothing to do
3136
return
3237

33-
print("Adding base context based on reference FASTA sequence for sample")
38+
logger.info("Adding base context based on reference FASTA sequence for sample")
3439
references = adata.obs[ref_column].cat.categories
3540
site_types = []
3641

@@ -95,8 +100,8 @@ def append_base_context(
95100
elif sequence[i - 1] != "C" and sequence[i + 1] != "C":
96101
boolean_dict[f"{ref}_other_C_site"][i] = True
97102
else:
98-
print(
99-
"Error: top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
103+
logger.error(
104+
"Top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
100105
)
101106

102107
if "A" in mod_target_bases:
@@ -111,8 +116,8 @@ def append_base_context(
111116
if sequence[i] == "T":
112117
boolean_dict[f"{ref}_A_site"][i] = True
113118
else:
114-
print(
115-
"Error: top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
119+
logger.error(
120+
"Top or bottom strand of conversion could not be determined. Ensure this value is in the Reference name."
116121
)
117122

118123
for site_type in site_types:

src/smftools/preprocessing/append_binary_layer_by_base_context.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import numpy as np
22
import scipy.sparse as sp
33

4+
from smftools.logging_utils import get_logger
5+
6+
logger = get_logger(__name__)
7+
48

59
def append_binary_layer_by_base_context(
610
adata,
@@ -65,7 +69,10 @@ def append_binary_layer_by_base_context(
6569
def _col_mask_or_warn(colname):
6670
if colname not in adata.var.columns:
6771
if verbose:
68-
print(f"Warning: var column '{colname}' not found; treating as all-False mask.")
72+
logger.warning(
73+
"Var column '%s' not found; treating as all-False mask.",
74+
colname,
75+
)
6976
return np.zeros(n_vars, dtype=bool)
7077
vals = adata.var[colname].values
7178
# coerce truthiness
@@ -86,7 +93,7 @@ def _col_mask_or_warn(colname):
8693
X = adata.X
8794
if sp.issparse(X):
8895
if verbose:
89-
print("Converting sparse X to dense array for layer construction (temporary).")
96+
logger.info("Converting sparse X to dense array for layer construction (temporary).")
9097
X = X.toarray()
9198
X = np.asarray(X, dtype=np.float32)
9299

@@ -149,13 +156,13 @@ def _col_mask_or_warn(colname):
149156
def _filled_positions(arr):
150157
return int(np.sum(~np.isnan(arr)))
151158

152-
print("Layer build summary (non-NaN cell counts):")
153-
print(f" GpC: {_filled_positions(masked_gpc)}")
154-
print(f" CpG: {_filled_positions(masked_cpg)}")
155-
print(f" GpC+CpG combined: {_filled_positions(combined_sum)}")
156-
print(f" C: {_filled_positions(masked_any_c)}")
157-
print(f" other_C: {_filled_positions(masked_other_c)}")
158-
print(f" A: {_filled_positions(masked_a)}")
159+
logger.info("Layer build summary (non-NaN cell counts):")
160+
logger.info(" GpC: %s", _filled_positions(masked_gpc))
161+
logger.info(" CpG: %s", _filled_positions(masked_cpg))
162+
logger.info(" GpC+CpG combined: %s", _filled_positions(combined_sum))
163+
logger.info(" C: %s", _filled_positions(masked_any_c))
164+
logger.info(" other_C: %s", _filled_positions(masked_other_c))
165+
logger.info(" A: %s", _filled_positions(masked_a))
159166

160167
# mark as done
161168
adata.uns[uns_flag] = True

src/smftools/preprocessing/binarize_on_Youden.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from smftools.logging_utils import get_logger
2+
3+
logger = get_logger(__name__)
4+
5+
16
def binarize_on_Youden(
27
adata,
38
ref_column: str = "Reference_strand",
@@ -40,7 +45,7 @@ def binarize_on_Youden(
4045
ref_labels = adata.obs[ref_column].to_numpy()
4146

4247
for ref in references:
43-
print(f"Binarizing on Youden statistics for {ref}")
48+
logger.info("Binarizing on Youden statistics for %s", ref)
4449

4550
ref_mask = ref_labels == ref
4651
if not np.any(ref_mask):
@@ -84,9 +89,10 @@ def binarize_on_Youden(
8489
binarized[ref_mask, :] = block_out
8590

8691
adata.layers[output_layer_name] = binarized
87-
print(
88-
f"Finished binarization → stored in adata.layers['{output_layer_name}'] "
89-
f"(mask_failed_positions={mask_failed_positions})"
92+
logger.info(
93+
"Finished binarization → stored in adata.layers['%s'] (mask_failed_positions=%s)",
94+
output_layer_name,
95+
mask_failed_positions,
9096
)
9197

9298

src/smftools/preprocessing/binary_layers_to_ohe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
## binary_layers_to_ohe
22

3+
from smftools.logging_utils import get_logger
4+
5+
logger = get_logger(__name__)
6+
37
## Conversion SMF Specific
48
def binary_layers_to_ohe(adata, binary_layers, stack="hstack"):
59
"""
@@ -23,7 +27,7 @@ def binary_layers_to_ohe(adata, binary_layers, stack="hstack"):
2327
N_binary_layer = [layer for layer in binary_layers if layer == "N_binary_encoding"]
2428
# Add the N_binary_encoding layer to the end of the list of binary layers
2529
all_binary_layers = ACGT_binary_layers + N_binary_layer
26-
print(f"Found {all_binary_layers} layers in adata")
30+
logger.info("Found %s layers in adata", all_binary_layers)
2731

2832
# Extract the layers
2933
layers = [adata.layers[layer_name] for layer_name in all_binary_layers]

src/smftools/preprocessing/calculate_coverage.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from smftools.logging_utils import get_logger
2+
3+
logger = get_logger(__name__)
4+
5+
16
def calculate_coverage(
27
adata,
38
ref_column="Reference_strand",
@@ -34,7 +39,7 @@ def calculate_coverage(
3439

3540
# Loop over references
3641
for ref in references:
37-
print(f"Assessing positional coverage across samples for {ref} reference")
42+
logger.info("Assessing positional coverage across samples for %s reference", ref)
3843

3944
# Subset to current category
4045
ref_mask = adata.obs[ref_column] == ref

src/smftools/preprocessing/calculate_position_Youden.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
## calculate_position_Youden
22
## Calculating and applying position level thresholds for methylation calls to binarize the SMF data
3+
from smftools.logging_utils import get_logger
4+
5+
logger = get_logger(__name__)
36
def calculate_position_Youden(
47
adata,
58
positive_control_sample=None,
@@ -37,7 +40,7 @@ def calculate_position_Youden(
3740
references = adata.obs[ref_column].cat.categories
3841
# Iterate over each category in the specified obs_column
3942
for ref in references:
40-
print(f"Calculating position Youden statistics for {ref}")
43+
logger.info("Calculating position Youden statistics for %s", ref)
4144
# Subset to keep only reads associated with the category
4245
ref_subset = adata[adata.obs[ref_column] == ref]
4346
# Iterate over positive and negative control samples
@@ -58,7 +61,7 @@ def calculate_position_Youden(
5861
threshold = np.percentile(sorted_column, infer_on_percentile)
5962
control_subset = ref_subset[ref_subset.obs[inference_variable] <= threshold, :]
6063
elif not infer_on_percentile and not control:
61-
print(
64+
logger.error(
6265
"Can not threshold Anndata on Youden threshold. Need to either provide control samples or set infer_on_percentile to True"
6366
)
6467
return
@@ -152,4 +155,4 @@ def calculate_position_Youden(
152155
True if i > J_threshold else False for i in J_max_list
153156
]
154157

155-
print("Finished calculating position Youden statistics")
158+
logger.info("Finished calculating position Youden statistics")

0 commit comments

Comments
 (0)