@@ -153,7 +153,7 @@ def createkey(row):
153
153
samples_w_variant_df .columns = ['sample_x' , 'sample_y' , 'variant_info' , 'chrom' , 'start' , 'end' , 'strand' , 'anchor' ,
154
154
'info' , 'genes' , 'names' , 'mean_norm_score_variant' , 'sd_norm_score_variant' ,
155
155
'norm_scores_variant' , 'scores' , 'junction' , 'total_score_variant' ]
156
- tmp_df = samples_w_variant_df .groupby ('variant_info' )[['sample_x' ]].aggregate (lambda x : set (x .tolist ())).reset_index ()
156
+ tmp_df = samples_w_variant_df .groupby ('variant_info' )[['sample_x' ]].aggregate (lambda x : list ( set (x .tolist () ))).reset_index ()
157
157
samples_w_variant_df = pd .merge (samples_w_variant_df , tmp_df , on = 'variant_info' )
158
158
samples_w_variant_df = samples_w_variant_df [['sample_y' , 'variant_info' , 'chrom' , 'start' , 'end' , 'strand' , 'anchor' ,
159
159
'info' , 'genes' , 'names' , 'mean_norm_score_variant' , 'sd_norm_score_variant' ,
@@ -227,12 +227,13 @@ def createkey(row):
227
227
def add_zeros (row ):
228
228
norm_scores = row [1 ]
229
229
if norm_scores == 0 :
230
- norm_scores = ['0' ]
230
+ norm_scores = [0 ]
231
231
samples_wout_variant = row [2 ]
232
232
samples_w_variant = row [3 ]
233
233
num_of_zeros_toadd = num_of_samples - samples_wout_variant - samples_w_variant
234
234
zeros = np .repeat (0 , num_of_zeros_toadd ).tolist ()
235
235
norm_scores = norm_scores + zeros
236
+ norm_scores .sort (reverse = True )
236
237
new_norm_score_value = (',' ).join (map (str , norm_scores ))
237
238
return new_norm_score_value
238
239
@@ -241,15 +242,15 @@ def add_zeros(row):
241
242
del (tmp_df )
242
243
243
244
def get_mean (row ):
244
- values = row [- 1 ].split (',' )
245
+ values = row [33 ].split (',' )
245
246
values = [float (i ) for i in values ]
246
247
mean = np .mean (values )
247
248
return mean
248
249
249
250
master_df ['mean_norm_score_non' ] = master_df .apply (lambda row : get_mean (row ), axis = 1 )
250
251
251
252
def get_sd (row ):
252
- values = row [- 2 ].split (',' )
253
+ values = row [33 ].split (',' )
253
254
values = [float (i ) for i in values ]
254
255
std = np .std (values )
255
256
return std
@@ -265,23 +266,42 @@ def get_min(row):
265
266
master_df ['min_norm_score_variant' ] = master_df .apply (lambda row : get_min (row ), axis = 1 )
266
267
267
268
def get_pvalue_mean (row ):
268
- values = row [32 ].split (',' )
269
+ values = row [33 ].split (',' )
269
270
values = [float (i ) for i in values ]
270
271
mean_value = row [10 ]
271
272
pvalue = stats .percentileofscore (values , mean_value )
272
273
pvalue = 1 - (pvalue / 100.0 )
274
+ pvalue = re .sub ('[\[\]]' , '' , np .array_str (pvalue ))
273
275
return pvalue
274
276
275
277
master_df ['p_value_mean' ] = master_df .apply (lambda row : get_pvalue_mean (row ), axis = 1 )
276
278
277
279
def get_pvalue_min (row ):
278
- values = row [32 ].split (',' )
280
+ values = row [33 ].split (',' )
279
281
values = [float (i ) for i in values ]
280
- mean_value = row [35 ]
281
- pvalue = stats .percentileofscore (values , mean_value )
282
+ min_value = row [36 ]
283
+ pvalue = stats .percentileofscore (values , min_value )
282
284
pvalue = 1 - (pvalue / 100.0 )
285
+ pvalue = re .sub ('[\[\]]' , '' , np .array_str (pvalue ))
283
286
return pvalue
284
287
285
288
master_df ['p_value_min' ] = master_df .apply (lambda row : get_pvalue_mean (row ), axis = 1 )
286
289
290
+ master_df = master_df [['variant_samples' , 'variant_info_x' , 'genes_x' , 'junction_samples' ,
291
+ 'chrom_x' , 'start_x' , 'end_x' , 'strand_x' , 'anchor_x' , 'info' ,
292
+ 'names' , 'mean_norm_score_variant' , 'sd_norm_score_variant' ,
293
+ 'norm_scores_variant' , 'total_score_variant' , 'mean_norm_score_non' ,
294
+ 'sd_norm_score_non' , 'new_norm_scores' , 'total_score_non' , 'p_value_mean' ,'p_value_min' ]]
295
+ master_df .columns = ['variant_samples' , 'variant_info' , 'genes' , 'junction_samples' ,
296
+ 'chrom' , 'start' , 'end' , 'strand' , 'anchor' , 'variant_junction_info' ,
297
+ 'names' , 'mean_norm_score_variant' , 'sd_norm_score_variant' ,
298
+ 'norm_scores_variant' , 'total_score_variant' , 'mean_norm_score_non' ,
299
+ 'sd_norm_score_non' , 'norm_scores_non' , 'total_score_non' , 'p_value_mean' ,'p_value_min' ]
300
+
301
+ master_df = master_df .applymap (lambda x : x [0 ] if isinstance (x , list ) else x )
302
+ master_df = master_df .fillna (0 )
303
+
304
+ master_df .to_csv ('test_results.tsv' , sep = '\t ' , index = False )
305
+ print (master_df .info ())
287
306
# master_df = master_df[['samples', 'variant_info_x', ']]
307
+ #why are variant_samples >1 missing?
0 commit comments