@@ -9,21 +9,21 @@ library(tidyverse)
9
9
10
10
debug = F
11
11
12
- # system.time({
13
- # if (debug){
14
- # tag = paste("_", "default", sep="")
15
- # } else {
16
- # # get options tag
17
- # args = commandArgs(trailingOnly = TRUE)
18
- # tag = args[1]
19
- # input_file = args[2]
20
- # if ( substr(tag, 2, 3) == "--"){
21
- # stop("Please specify an option tag (e.g. \"default\", \"i20e5\")")
22
- # }
23
- # }
24
-
25
- tag = ' E'
26
- input_file = ' all_splicing_variants_E.bed'
12
+ system.time({
13
+ if (debug ){
14
+ tag = paste(" _" , " default" , sep = " " )
15
+ } else {
16
+ # get options tag
17
+ args = commandArgs(trailingOnly = TRUE )
18
+ tag = args [1 ]
19
+ input_file = args [2 ]
20
+ if ( substr(tag , 2 , 3 ) == " --" ){
21
+ stop(" Please specify an option tag (e.g. \" default\" , \" i20e5\" )" )
22
+ }
23
+ }
24
+
25
+ # tag = 'E'
26
+ # input_file = '/Users/kcotto/Desktop/CHOL/ all_splicing_variants_E.bed'
27
27
28
28
# All splicing relevant variants (union of rows from variants.bed files; add column with comma-separated list of sample names)
29
29
all_splicing_variants = unique(data.table :: fread(input_file ), sep = ' \t ' , header = T , stringsAsFactors = FALSE )
@@ -187,14 +187,14 @@ get_mean <- function(x){
187
187
return (x )
188
188
}
189
189
190
- x <- mapply(get_mean , regtools_data $ norm_scores_non )
191
- regtools_data $ mean_norm_score_non <- x
192
-
193
190
get_sd <- function (x ){
194
191
x <- sd(as.numeric(x ))
195
192
return (x )
196
193
}
197
194
195
+ x <- mapply(get_mean , regtools_data $ norm_scores_non )
196
+ regtools_data $ mean_norm_score_non <- x
197
+
198
198
x <- mapply(get_sd , regtools_data $ norm_scores_non )
199
199
regtools_data $ sd_norm_score_non <- x
200
200
@@ -222,21 +222,54 @@ regtools_data$mean_norm_score_variant <- x
222
222
x <- mapply(get_sd , regtools_data $ norm_scores_variant )
223
223
regtools_data $ sd_norm_score_variant <- x
224
224
225
+ get_min <- function (x ){
226
+ x <- min(as.numeric(x ))
227
+ return (x )
228
+ }
229
+
230
+ x <- mapply(get_min , regtools_data $ norm_scores_variant )
231
+ regtools_data $ min_norm_score_variant <- x
232
+
225
233
print(" test7" )
226
234
227
235
# ############### calculate p-values ############################################
228
236
237
+ # calculate using mean
229
238
a <- function (x ){
230
- variant_norm_score = as.numeric(unlist(strsplit(x [[' norm_scores_variant' ]], ' ,' , fixed = TRUE )))
239
+ variant_norm_score = mean( as.numeric(unlist(strsplit(x [[' norm_scores_variant' ]], ' ,' , fixed = TRUE ) )))
231
240
if (length(x [[' norm_scores_non' ]]) < = 1 ){
232
241
return (0 )
233
242
}
234
243
235
244
all_norm_scores = c(x $ norm_scores_non , variant_norm_score )
236
245
countable = rank(all_norm_scores )
237
246
num_samples = str_count(x $ norm_scores_variant , ' ,' ) + 1
238
- non_variant_norm_scores_ranked = head(countable , (- 1 * num_samples ))
239
- variant_norm_score_ranked = tail(countable , num_samples )
247
+ non_variant_norm_scores_ranked = head(countable , - 1 )
248
+ variant_norm_score_ranked = tail(countable , 1 )
249
+ histinfo = hist(non_variant_norm_scores_ranked ,
250
+ breaks = seq(0.5 , max(non_variant_norm_scores_ranked )+ 1.5 , by = 1 ), plot = F )
251
+ mids = histinfo $ mids
252
+ cd = cumsum(histinfo $ density )
253
+ underestimate = max(which(mids < = variant_norm_score_ranked ))
254
+ pvalue = 1 - cd [underestimate ]
255
+ return (pvalue )
256
+ }
257
+
258
+ # calculate using min
259
+ b <- function (x ){
260
+ variant_norm_score = min(as.numeric(unlist(strsplit(x [[' norm_scores_variant' ]], ' ,' , fixed = TRUE ))))
261
+ if (variant_norm_score == 0 ){
262
+ return (1 )
263
+ }
264
+ if (length(x [[' norm_scores_non' ]]) < = 1 ){
265
+ return (0 )
266
+ }
267
+
268
+ all_norm_scores = c(x $ norm_scores_non , variant_norm_score )
269
+ countable = rank(all_norm_scores )
270
+ num_samples = str_count(x $ norm_scores_variant , ' ,' ) + 1
271
+ non_variant_norm_scores_ranked = head(countable , - 1 )
272
+ variant_norm_score_ranked = tail(countable , 1 )
240
273
histinfo = hist(non_variant_norm_scores_ranked ,
241
274
breaks = seq(0.5 , max(non_variant_norm_scores_ranked )+ 1.5 , by = 1 ), plot = F )
242
275
mids = histinfo $ mids
@@ -246,7 +279,8 @@ a <- function(x){
246
279
return (pvalue )
247
280
}
248
281
249
- regtools_data $ p_value <- apply(regtools_data , 1 , a )
282
+ regtools_data $ p_value_mean <- apply(regtools_data , 1 , a )
283
+ regtools_data $ p_value_min <- apply(regtools_data , 1 , b )
250
284
print(" Number of rows in data.table" )
251
285
print(length(regtools_data $ samples ))
252
286
@@ -258,12 +292,12 @@ regtools_data$norm_scores_non <- unlist(lapply(regtools_data$norm_scores_non,pas
258
292
columns_to_keep = c(' samples' , ' variant_info.x' , ' genes' , ' sample' , " chrom.x" , " start.x" , " end.x" , ' strand.x' , ' anchor.x' , ' info' ,
259
293
' names' , ' mean_norm_score_variant' , ' sd_norm_score_variant' , ' norm_scores_variant' ,
260
294
' total_score_variant' , ' mean_norm_score_non' , ' sd_norm_score_non' , ' norm_scores_non' ,
261
- ' total_score_non' , ' p_value ' )
295
+ ' total_score_non' , ' p_value_mean ' , ' p_value_min ' )
262
296
regtools_data = subset(regtools_data , select = columns_to_keep )
263
297
colnames(regtools_data ) <- c(' variant_samples' , ' variant_info' , ' genes' , ' junction_samples' , " chrom" , " start" , " end" , ' strand' , ' anchor' , ' variant_junction_info' ,
264
298
' names' , ' mean_norm_score_variant' , ' sd_norm_score_variant' , ' norm_scores_variant' ,
265
299
' total_score_variant' , ' mean_norm_score_non' , ' sd_norm_score_non' , ' norm_scores_non' ,
266
- ' total_score_non' , ' pvalue ' )
300
+ ' total_score_non' , ' p_value_mean ' , ' p_value_min ' )
267
301
regtools_data $ sd_norm_score_variant [is.na(regtools_data $ sd_norm_score_variant )] = 0
268
302
regtools_data $ mean_norm_score_non [is.na(regtools_data $ mean_norm_score_non )] = 0
269
303
regtools_data $ sd_norm_score_non [is.na(regtools_data $ sd_norm_score_non )] = 0
@@ -274,5 +308,5 @@ regtools_data = regtools_data %>% distinct()
274
308
275
309
276
310
write.table(regtools_data , file = paste(input_file , " _out.tsv" , sep = " " ), quote = FALSE , sep = ' \t ' , row.names = F )
277
- #
278
- # })
311
+
312
+ })
0 commit comments