Skip to content

Commit ef78215

Browse files
committed
added a billion debug lines to calc pdx
1 parent 87c321d commit ef78215

File tree

1 file changed

+53
-3
lines changed

1 file changed

+53
-3
lines changed

coderbuild/utils/calc_pdx_metrics.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,12 +261,41 @@ def lmm(time, volume, treatment, drug_name):
261261
dict: A dictionary containing the fit object and the specific coefficient value.
262262
"""
263263

264+
# --- DEBUG: inputs to LMM ---
265+
print("\n[DBG][LMM] incoming arrays")
266+
print(f"[DBG][LMM] len(time)={len(time)}, len(volume)={len(volume)}, len(treatment)={len(treatment)}, drug_name={drug_name}")
267+
268+
264269
data = pd.DataFrame({'model_id':['model']*len(time),\
265270
'volume':volume,\
266271
'time':time,\
267272
'exp_type':treatment})
268273

269-
data = data.dropna()
274+
275+
print("[DBG][LMM] raw data shape:", data.shape)
276+
print("[DBG][LMM] raw dtypes:\n", data.dtypes)
277+
print("[DBG][LMM] NA counts:\n", data.isna().sum())
278+
279+
data['log_volume'] = np.log(data['volume'])
280+
n_nonfinite_log = (~np.isfinite(data['log_volume'])).sum()
281+
print(f"[DBG][LMM] non-finite log_volume count: {n_nonfinite_log}")
282+
283+
# categories
284+
data['exp_type'] = data['exp_type'].astype('category')
285+
data['exp_type'] = pd.Categorical(data['exp_type'],
286+
categories=['control', drug_name],
287+
ordered=True)
288+
print("[DBG][LMM] exp_type categories:", list(data['exp_type'].cat.categories))
289+
print("[DBG][LMM] exp_type value_counts:\n", data['exp_type'].value_counts(dropna=False))
290+
291+
# time variation overall and by treatment
292+
print(f"[DBG][LMM] time min/max: {data['time'].min()} / {data['time'].max()}")
293+
print("[DBG][LMM] time describe by exp_type:\n",
294+
data.groupby('exp_type', dropna=False)['time'].describe())
295+
296+
# groups
297+
print("[DBG][LMM] unique model_id count:", data['model_id'].nunique())
298+
print("[DBG][LMM] model_id value_counts:\n", data['model_id'].value_counts())
270299

271300
##create data frame from these 4 vectors
272301
required_columns = ["model_id", "volume", "time", "exp_type"]
@@ -286,6 +315,11 @@ def lmm(time, volume, treatment, drug_name):
286315
#print(data['exp_type'].cat.categories)
287316
# Fit the model
288317
model = mixedlm(formula, data, groups=data['model_id'])
318+
print("[DBG][LMM] exp_type counts:\n", data['exp_type'].value_counts(dropna=False))
319+
print("[DBG][LMM] time min/max:", data['time'].min(), data['time'].max())
320+
print("[DBG][LMM] volume<=0:", (data['volume']<=0).sum(), " nonfinite log_volume:", (~np.isfinite(np.log(data['volume']))).sum())
321+
print("[DBG][LMM] groups:", data['model_id'].nunique())
322+
289323
fit = model.fit()
290324

291325
# Get the coefficient for the interaction term 'time:exp_type'
@@ -384,9 +418,25 @@ def get_drug_stats(df, control='control'):
384418
else:
385419
singleres.append(treat_abc)
386420

387-
#llm
421+
# --- DEBUG: before LMM ---
422+
print("\n[DBG] -------- Drug block --------")
423+
print(f"[DBG] experiment={name[0]} model_id={mod} drug={d}")
424+
print(f"[DBG] control rows: {len(ctl_data)} treated rows: {len(d_data)}")
425+
426+
# Quick sanity on time / volume
427+
print(f"[DBG] control time min/max: {ctl_time.min() if len(ctl_time)>0 else None} / {ctl_time.max() if len(ctl_time)>0 else None}")
428+
print(f"[DBG] treat time min/max: {treat_time.min() if len(treat_time)>0 else None} / {treat_time.max() if len(treat_time)>0 else None}")
429+
430+
print(f"[DBG] control volume<=0: {(ctl_volume<=0).sum() if len(ctl_volume)>0 else None}")
431+
print(f"[DBG] treat volume<=0: {(treat_volume<=0).sum() if len(treat_volume)>0 else None}")
432+
433+
# Show the first few rows that will go into LMM
388434
comb = pd.concat([ctl_data, d_data])
389-
#print(comb)
435+
print("[DBG] comb dtypes:\n", comb.dtypes)
436+
print("[DBG] comb treatment counts:\n", comb['treatment'].value_counts(dropna=False))
437+
print("[DBG] comb head:\n", comb[['treatment','time','volume']].head())
438+
# --- END DEBUG ---
439+
390440
lmm_res = lmm(comb.time, comb.volume, comb.treatment, d)
391441
lmm_res.update({'sample': mod, 'drug': d, 'time': np.max(treat_time), 'time_unit': 'days'})
392442
if '+' in d:

0 commit comments

Comments
 (0)