Skip to content

Commit 4a8fa6f

Browse files
committed
Addressing copilot comments
1 parent da13b90 commit 4a8fa6f

File tree

2 files changed

+25
-16
lines changed

2 files changed

+25
-16
lines changed

python/src/gpdm/core.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@
7979
# Standard scientific stack
8080
import numpy as np
8181
import pandas as pd
82-
import matplotlib.pyplot as plt
83-
import matplotlib.gridspec as gridspec
8482

8583
# sklearn GP: GaussianProcessRegressor wraps the kernel math and optimizer
8684
from sklearn.gaussian_process import GaussianProcessRegressor
@@ -101,9 +99,6 @@
10199
# when very few CpGs fall in a domain; we handle these gracefully.
102100
warnings.filterwarnings("ignore", category=UserWarning)
103101

104-
# Module-level logger; callers can silence with:
105-
# logging.getLogger("gpdm").setLevel(logging.CRITICAL)
106-
logging.basicConfig(level=logging.INFO, format="%(message)s")
107102
log = logging.getLogger("gpdm")
108103

109104

@@ -1626,6 +1621,9 @@ def plot_results(
16261621
if results is None:
16271622
raise ValueError("Run analysis first")
16281623

1624+
import matplotlib.pyplot as plt
1625+
import matplotlib.gridspec as gridspec
1626+
16291627
# Color palette for dark or light theme
16301628
if dark_theme:
16311629
c = {
@@ -1837,6 +1835,8 @@ def plot_comparison(
18371835
if self.results_naive is None or self.results_annotation is None:
18381836
raise ValueError("Run with method='both' first")
18391837

1838+
import matplotlib.pyplot as plt
1839+
18401840
fig, axes = plt.subplots(2, 2, figsize=figsize, facecolor="#0a0e1a")
18411841
fig.suptitle("Naive GP vs Annotation-Aware GP",
18421842
color="#e2e8f0", fontsize=14, fontweight=700,

python/src/gpdm_analysis.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ def read_region_from_h5(h5file, samples, chrom, start, stop):
128128
all_names = f['meta/samples/names'].asstr()[:]
129129
col_idx = f['meta/samples/col_idx'][:]
130130

131-
# Flat array of genomic start positions across all chromosomes
132-
start_pos = f['meta/start'][:]
133-
134131
# Build a lookup: sample name → integer column index in beta/values
135132
name_to_col = dict(zip(all_names, col_idx))
136133

@@ -160,8 +157,8 @@ def read_region_from_h5(h5file, samples, chrom, start, stop):
160157
row_start = prefix[chrom_idx] # first H5 row for this chromosome
161158
row_end = prefix[chrom_idx + 1] # one past last H5 row for this chromosome
162159

163-
# Extract just this chromosome's genomic positions
164-
chrom_pos = start_pos[row_start:row_end]
160+
# Read only this chromosome's positions (not the whole genome)
161+
chrom_pos = f['meta/start'][row_start:row_end]
165162

166163
# Binary search within the chromosome's position array to find the
167164
# subarray that falls within [start, stop)
@@ -283,15 +280,27 @@ def run_gpdm(params):
283280
# missing values (those that survived the nan_threshold filter).
284281
nan_count = int(np.isnan(beta_matrix).sum()) # total NaNs before imputation (for metadata)
285282
if nan_count > 0:
283+
# Drop probes that are all-NaN in either group to avoid RuntimeWarnings
284+
# from nanmean (which would write to stderr and break run_python())
285+
keep = np.ones(beta_matrix.shape[1], dtype=bool)
286+
for grp in ('group1', 'group2'):
287+
mask = groups == grp
288+
all_nan = np.all(np.isnan(beta_matrix[mask, :]), axis=0)
289+
keep &= ~all_nan
290+
if not np.all(keep):
291+
beta_matrix = beta_matrix[:, keep]
292+
positions = positions[keep]
293+
294+
# Impute remaining per-sample NaNs with per-group column mean
286295
for grp in ('group1', 'group2'):
287-
mask = groups == grp # boolean row mask for this group
288-
grp_data = beta_matrix[mask, :] # (n_grp_samples, n_probes)
289-
grp_means = np.nanmean(grp_data, axis=0) # per-probe mean ignoring NaN
290-
for j in range(grp_data.shape[1]): # iterate over probe columns
296+
mask = groups == grp
297+
grp_data = beta_matrix[mask, :]
298+
grp_means = np.nanmean(grp_data, axis=0)
299+
for j in range(grp_data.shape[1]):
291300
nans = np.isnan(grp_data[:, j])
292301
if np.any(nans):
293-
grp_data[nans, j] = grp_means[j] # replace NaN with column mean
294-
beta_matrix[mask, :] = grp_data # write imputed values back
302+
grp_data[nans, j] = grp_means[j]
303+
beta_matrix[mask, :] = grp_data
295304

296305
# --- Step 3: Initialize GPDM analysis object ---
297306
analysis = RegionalDMAnalysis(chrom=chrom, start=start, end=stop)

0 commit comments

Comments
 (0)