stemangiola
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/ppcSeq.R‎
Lines changed: 22 additions & 14 deletions b/‎R/ppcSeq.R‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎dev/HPC_execute_draw_tcga_qq_plots.R‎
Lines changed: 15 additions & 0 deletions b/‎dev/HPC_execute_draw_tcga_qq_plots.R‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎inst/stan/negBinomial_MPI.stan‎
Lines changed: 10 additions & 18 deletions b/‎inst/stan/negBinomial_MPI.stan‎
Lines changed: 10 additions & 18 deletions
diff --git a/‎man/do_inference.Rd‎
Lines changed: 2 additions & 1 deletion b/‎man/do_inference.Rd‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎man/ppc_seq.Rd‎
Lines changed: 1 addition & 1 deletion b/‎man/ppc_seq.Rd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/vb_iterative.Rd‎
Lines changed: 2 additions & 1 deletion b/‎man/vb_iterative.Rd‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/testthat/test-ppcSeq.R‎
Lines changed: 12 additions & 12 deletions b/‎tests/testthat/test-ppcSeq.R‎
Lines changed: 12 additions & 12 deletions
@@ -11,3 +11,4 @@
 dev/false_positive_study.RData
 dev/false_positive_study.pdf
 dev/*rds
+temp*
@@ -180,6 +180,7 @@ vb_iterative = function(model,
 												output_samples,
 												iter,
 												tol_rel_obj,
+												additional_parameters_to_save,
 												...) {
 	res = NULL
 	i = 0
@@ -190,8 +191,9 @@ vb_iterative = function(model,
 				output_samples = output_samples,
 				iter = iter,
 				tol_rel_obj = tol_rel_obj,
+				sample_file = "temp_stan_sampling.txt",
+				pars=c("counts_rng", "exposure_rate", additional_parameters_to_save),
 				...
-				#, pars=c("counts_rng", "exposure_rate", additional_parameters_to_save)
 			)
 			boolFalse <- T
 			return(my_res)
@@ -392,7 +394,6 @@ add_deleterious_if_covariate_exists = function(input.df, X){
 		)
 }
 
-
 merge_results = function(res_discovery, res_test, formula, gene_column, value_column, sample_column, do_check_only_on_detrimental){
 
 	res_discovery %>%
@@ -462,7 +463,6 @@ merge_results = function(res_discovery, res_test, formula, gene_column, value_co
 		)
 }
 
-
 # Select only significant genes plus background for efficient normalisation
 # Input: tibble
 # Ouyput: tibble
@@ -524,7 +524,8 @@ run_model = function(model, full_bayes, chains, how_many_posterior_draws, inits_
 				"counts_rng",
 				"exposure_rate",
 				additional_parameters_to_save
-			)
+			),
+			sample_file = "temp_stan_sampling.txt"
 		)
 	)
 }
@@ -711,7 +712,10 @@ do_inference = function(my_df,
 												inits_fx = "random",
 												prior_from_discovery = tibble(`.variable` = character(),
 																											mean = numeric(),
-																											sd = numeric()), pass_fit = F, tol_rel_obj = 0.01) {
+																											sd = numeric()),
+												pass_fit = F,
+												tol_rel_obj = 0.01,
+												write_on_disk = F) {
 
 	writeLines(sprintf("executing %s", "do_inference"))
 
@@ -821,6 +825,7 @@ do_inference = function(my_df,
 	CP = ncol(counts_package)
 
 	# Run model
+	writeLines(sprintf("- Roughly the memory allocation for the fit object is %s Gb", object.size(1:(S * how_many_to_check * how_many_posterior_draws))/1e9))
 
 	# Set up environmental variable for threading
 	Sys.setenv("STAN_NUM_THREADS" = my_cores)
@@ -844,7 +849,8 @@ do_inference = function(my_df,
 					"counts_rng",
 					"exposure_rate",
 					additional_parameters_to_save
-				)
+				),
+				sample_file = switch(write_on_disk %>% `!` %>% sum(1), "temp_stan_sampling.txt", NULL)
 			),
 
 			# VB Repeat strategy for failures of vb
@@ -854,14 +860,12 @@ do_inference = function(my_df,
 				output_samples = how_many_posterior_draws,
 				iter = 50000,
 				tol_rel_obj = 0.005,
-				pars = c(
-					"counts_rng",
-					"exposure_rate",
-					additional_parameters_to_save
-				)
+				additional_parameters_to_save = additional_parameters_to_save
 			)
 		)
 
+	writeLines("Fit object successfully loaded in memory. Going forward to parsing fir object")
+
 	# Parse and return
 	fit %>%
 		parse_fit(adj_prob_theshold) %>%
@@ -1022,7 +1026,8 @@ ppc_seq = function(input.df,
 									 pass_fit = F,
 									 do_check_only_on_detrimental = length(parse_formula(formula)) > 0,
 									 tol_rel_obj = 0.01,
-									 just_discovery = F
+									 just_discovery = F,
+									 write_on_disk = F
 									) {
 	# Prepare column same enquo
 	sample_column = enquo(sample_column)
@@ -1122,7 +1127,9 @@ ppc_seq = function(input.df,
 			intercept_shift_scale,
 			additional_parameters_to_save,
 			adj_prob_theshold  = 0.05,
-			pass_fit = pass_fit, tol_rel_obj = tol_rel_obj
+			pass_fit = pass_fit,
+			tol_rel_obj = tol_rel_obj,
+			write_on_disk = write_on_disk
 		)
 
 	# For building some figure I just need the discovery run, return prematurely
@@ -1177,7 +1184,8 @@ ppc_seq = function(input.df,
 			to_exclude = to_exclude,
 			save_generated_quantities = save_generated_quantities,
 			tol_rel_obj = tol_rel_obj,
-			truncation_compensation = 0.7352941 # Taken by approximation study
+			truncation_compensation = 0.7352941, # Taken by approximation study
+			write_on_disk = write_on_disk
 		)
 
 	# Merge results and return
 
@@ -102,6 +102,21 @@ foreach(r = 1:6) %do% {
 
 x = readRDS("dev/draw_qq_TCGA_with_covariates_1.rds")
 
+TCGA_tbl %>%
+
+	# Filter
+	filter(`CAPRA-S` %>% is.na %>% `!`) %>%
+	separate(sample, c("data base", "laboratory", "patient"), sep="-", remove = F) %>%
+	inner_join(
+		(.) %>% distinct(sample, laboratory) %>% count(laboratory) %>% filter(n >= 8) %>% select(-n)
+	) %>%
+	mutate(risk = `CAPRA-S` <= 3) %>%
+
+	# Do check
+	mutate(do_check = (!`house keeping`) & run==1) %>%
+
+format_input(input.df, formula, sample_column, gene_column, value_column, do_check_column, significance_column, how_many_negative_controls)
+
 x %>%
 	attr("fit") %>%
 	rstan::summary() %$% summary %>%
 
@@ -172,13 +172,12 @@ transformed data {
 }
 parameters {
 
-
   // Overall properties of the data
-  real lambda_mu_raw; // So is compatible with logGamma prior
+  real<offset=lambda_mu_mu> lambda_mu; // So is compatible with logGamma prior
   real<lower=0> lambda_sigma;
   real lambda_skew;
 
-  vector[S] exposure_rate_raw;
+  vector<multiplier = exposure_rate_multiplier>[S] exposure_rate;
 
   // Gene-wise properties of the data
   row_vector[G] intercept;
@@ -195,22 +194,15 @@ parameters {
 }
 transformed parameters {
 
-	// For better adaptation
-	real lambda_mu = lambda_mu_raw + lambda_mu_mu;
-	//row_vector[G] intercept = (intercept_raw * intercept_shift_scale[2]) + intercept_shift_scale[1];
-	vector[S] exposure_rate = exposure_rate_raw * exposure_rate_multiplier;
-
   // Sigma
   vector[G] sigma = 1.0 ./ exp(sigma_raw) ;
 	matrix[C,G] alpha = merge_coefficients(intercept, alpha_sub_1, alpha_2,  C,  S,  G);
-	matrix[S,G] lambda_log_param = X * alpha;
-
-
+	matrix[S,G] lambda_log_param =	X * alpha;
 }
 
 model {
 
-  lambda_mu_raw ~ normal(0,2);
+  lambda_mu ~ normal(lambda_mu_mu,2);
   lambda_sigma ~ normal(0,2);
 	lambda_skew ~ normal(0,1);
 
@@ -219,15 +211,15 @@ model {
   sigma_sigma ~ normal(0,2);
 
   // Gene-wise properties of the data
-  to_vector(intercept) ~ skew_normal(lambda_mu,lambda_sigma, lambda_skew);
+  to_vector(intercept) ~ skew_normal(lambda_mu + lambda_mu_mu ,lambda_sigma, lambda_skew);
   if(C>=2) alpha_sub_1 ~ double_exponential(0,1);
 	if(C>=3) to_vector(alpha_2) ~ normal(0,2.5);
 
-  sigma_raw ~ normal(sigma_slope * alpha[1,] + sigma_intercept,sigma_sigma);
+  sigma_raw ~ normal(sigma_slope * intercept + sigma_intercept,sigma_sigma);
 
   // Exposure prior
-  exposure_rate_raw ~ normal(0,1);
-  sum(exposure_rate_raw) ~ normal(0, 0.001 * S);
+  exposure_rate ~ normal(0,1);
+  sum(exposure_rate) ~ normal(0, 0.001 * S);
 
 	//Gene-wise properties of the data
 	target += sum(map_rect(
@@ -264,9 +256,9 @@ model {
 
 }
 generated quantities{
-	vector[G] counts_rng[S];
+	vector[how_many_to_check] counts_rng[S];
 
-	for(g in 1:G) for(s in 1:S)
+	for(g in 1:how_many_to_check) for(s in 1:S)
 		counts_rng[s,g] =	neg_binomial_2_log_rng(exposure_rate[s] + lambda_log_param[s,g],	sigma[g] * truncation_compensation);
 
 }
@@ -6,18 +6,18 @@ test_that("dummy",expect_equal(1,1))
 #
 # test_that("Quick test",{
 #
-  # FDR_threshold = 0.01
-  #
-  # res =
-  #   ppcSeq::counts %>%
-  #   mutate(is_significant = FDR < FDR_threshold) %>%
-  #   ppc_seq(
-  #     formula = ~ Label,
-  #     significance_column = PValue,
-  #     do_check_column  = is_significant,
-  #     value_column = value,
-  #     percent_false_positive_genes = "5%", tol_rel_obj = 0.01
-  #   )
+  FDR_threshold = 0.01
+
+  res =
+    ppcSeq::counts %>%
+    mutate(is_significant = FDR < FDR_threshold) %>%
+    ppc_seq(
+      formula = ~ Label,
+      significance_column = PValue,
+      do_check_column  = is_significant,
+      value_column = value,
+      percent_false_positive_genes = "5%", tol_rel_obj = 0.01
+    )
 #
 #   expect_equal(
 #