@@ -261,12 +261,41 @@ def lmm(time, volume, treatment, drug_name):
261261 dict: A dictionary containing the fit object and the specific coefficient value.
262262 """
263263
264+ # --- DEBUG: inputs to LMM ---
265+ print ("\n [DBG][LMM] incoming arrays" )
266+ print (f"[DBG][LMM] len(time)={ len (time )} , len(volume)={ len (volume )} , len(treatment)={ len (treatment )} , drug_name={ drug_name } " )
267+
268+
264269 data = pd .DataFrame ({'model_id' :['model' ]* len (time ),\
265270 'volume' :volume ,\
266271 'time' :time ,\
267272 'exp_type' :treatment })
268273
269- data = data .dropna ()
274+
275+ print ("[DBG][LMM] raw data shape:" , data .shape )
276+ print ("[DBG][LMM] raw dtypes:\n " , data .dtypes )
277+ print ("[DBG][LMM] NA counts:\n " , data .isna ().sum ())
278+
279+ data ['log_volume' ] = np .log (data ['volume' ])
280+ n_nonfinite_log = (~ np .isfinite (data ['log_volume' ])).sum ()
281+ print (f"[DBG][LMM] non-finite log_volume count: { n_nonfinite_log } " )
282+
283+ # categories
284+ data ['exp_type' ] = data ['exp_type' ].astype ('category' )
285+ data ['exp_type' ] = pd .Categorical (data ['exp_type' ],
286+ categories = ['control' , drug_name ],
287+ ordered = True )
288+ print ("[DBG][LMM] exp_type categories:" , list (data ['exp_type' ].cat .categories ))
289+ print ("[DBG][LMM] exp_type value_counts:\n " , data ['exp_type' ].value_counts (dropna = False ))
290+
291+ # time variation overall and by treatment
292+ print (f"[DBG][LMM] time min/max: { data ['time' ].min ()} / { data ['time' ].max ()} " )
293+ print ("[DBG][LMM] time describe by exp_type:\n " ,
294+ data .groupby ('exp_type' , dropna = False )['time' ].describe ())
295+
296+ # groups
297+ print ("[DBG][LMM] unique model_id count:" , data ['model_id' ].nunique ())
298+ print ("[DBG][LMM] model_id value_counts:\n " , data ['model_id' ].value_counts ())
270299
271300 ##create data frame from these 4 vectors
272301 required_columns = ["model_id" , "volume" , "time" , "exp_type" ]
@@ -286,6 +315,11 @@ def lmm(time, volume, treatment, drug_name):
286315 #print(data['exp_type'].cat.categories)
287316 # Fit the model
288317 model = mixedlm (formula , data , groups = data ['model_id' ])
318+ print ("[DBG][LMM] exp_type counts:\n " , data ['exp_type' ].value_counts (dropna = False ))
319+ print ("[DBG][LMM] time min/max:" , data ['time' ].min (), data ['time' ].max ())
320+ print ("[DBG][LMM] volume<=0:" , (data ['volume' ]<= 0 ).sum (), " nonfinite log_volume:" , (~ np .isfinite (np .log (data ['volume' ]))).sum ())
321+ print ("[DBG][LMM] groups:" , data ['model_id' ].nunique ())
322+
289323 fit = model .fit ()
290324
291325 # Get the coefficient for the interaction term 'time:exp_type'
@@ -384,9 +418,25 @@ def get_drug_stats(df, control='control'):
384418 else :
385419 singleres .append (treat_abc )
386420
387- #llm
421+ # --- DEBUG: before LMM ---
422+ print ("\n [DBG] -------- Drug block --------" )
423+ print (f"[DBG] experiment={ name [0 ]} model_id={ mod } drug={ d } " )
424+ print (f"[DBG] control rows: { len (ctl_data )} treated rows: { len (d_data )} " )
425+
426+ # Quick sanity on time / volume
427+ print (f"[DBG] control time min/max: { ctl_time .min () if len (ctl_time )> 0 else None } / { ctl_time .max () if len (ctl_time )> 0 else None } " )
428+ print (f"[DBG] treat time min/max: { treat_time .min () if len (treat_time )> 0 else None } / { treat_time .max () if len (treat_time )> 0 else None } " )
429+
430+ print (f"[DBG] control volume<=0: { (ctl_volume <= 0 ).sum () if len (ctl_volume )> 0 else None } " )
431+ print (f"[DBG] treat volume<=0: { (treat_volume <= 0 ).sum () if len (treat_volume )> 0 else None } " )
432+
433+ # Show the first few rows that will go into LMM
388434 comb = pd .concat ([ctl_data , d_data ])
389- #print(comb)
435+ print ("[DBG] comb dtypes:\n " , comb .dtypes )
436+ print ("[DBG] comb treatment counts:\n " , comb ['treatment' ].value_counts (dropna = False ))
437+ print ("[DBG] comb head:\n " , comb [['treatment' ,'time' ,'volume' ]].head ())
438+ # --- END DEBUG ---
439+
390440 lmm_res = lmm (comb .time , comb .volume , comb .treatment , d )
391441 lmm_res .update ({'sample' : mod , 'drug' : d , 'time' : np .max (treat_time ), 'time_unit' : 'days' })
392442 if '+' in d :
0 commit comments