@@ -128,9 +128,6 @@ def read_region_from_h5(h5file, samples, chrom, start, stop):
128128 all_names = f ['meta/samples/names' ].asstr ()[:]
129129 col_idx = f ['meta/samples/col_idx' ][:]
130130
131- # Flat array of genomic start positions across all chromosomes
132- start_pos = f ['meta/start' ][:]
133-
134131 # Build a lookup: sample name → integer column index in beta/values
135132 name_to_col = dict (zip (all_names , col_idx ))
136133
@@ -160,8 +157,8 @@ def read_region_from_h5(h5file, samples, chrom, start, stop):
160157 row_start = prefix [chrom_idx ] # first H5 row for this chromosome
161158 row_end = prefix [chrom_idx + 1 ] # one past last H5 row for this chromosome
162159
163- # Extract just this chromosome's genomic positions
164- chrom_pos = start_pos [row_start :row_end ]
160+ # Read only this chromosome's positions (not the whole genome)
161+ chrom_pos = f [ 'meta/start' ] [row_start :row_end ]
165162
166163 # Binary search within the chromosome's position array to find the
167164 # subarray that falls within [start, stop)
@@ -283,15 +280,27 @@ def run_gpdm(params):
283280 # missing values (those that survived the nan_threshold filter).
284281 nan_count = int (np .isnan (beta_matrix ).sum ()) # total NaNs before imputation (for metadata)
285282 if nan_count > 0 :
283+ # Drop probes that are all-NaN in either group to avoid RuntimeWarnings
284+ # from nanmean (which would write to stderr and break run_python())
285+ keep = np .ones (beta_matrix .shape [1 ], dtype = bool )
286+ for grp in ('group1' , 'group2' ):
287+ mask = groups == grp
288+ all_nan = np .all (np .isnan (beta_matrix [mask , :]), axis = 0 )
289+ keep &= ~ all_nan
290+ if not np .all (keep ):
291+ beta_matrix = beta_matrix [:, keep ]
292+ positions = positions [keep ]
293+
294+ # Impute remaining per-sample NaNs with per-group column mean
286295 for grp in ('group1' , 'group2' ):
287- mask = groups == grp # boolean row mask for this group
288- grp_data = beta_matrix [mask , :] # (n_grp_samples, n_probes)
289- grp_means = np .nanmean (grp_data , axis = 0 ) # per-probe mean ignoring NaN
290- for j in range (grp_data .shape [1 ]): # iterate over probe columns
296+ mask = groups == grp
297+ grp_data = beta_matrix [mask , :]
298+ grp_means = np .nanmean (grp_data , axis = 0 )
299+ for j in range (grp_data .shape [1 ]):
291300 nans = np .isnan (grp_data [:, j ])
292301 if np .any (nans ):
293- grp_data [nans , j ] = grp_means [j ] # replace NaN with column mean
294- beta_matrix [mask , :] = grp_data # write imputed values back
302+ grp_data [nans , j ] = grp_means [j ]
303+ beta_matrix [mask , :] = grp_data
295304
296305 # --- Step 3: Initialize GPDM analysis object ---
297306 analysis = RegionalDMAnalysis (chrom = chrom , start = start , end = stop )
0 commit comments