Skip to content

Commit 2719de4

Browse files
committed
trying to reduce memory consumption
1 parent 14f3822 commit 2719de4

File tree

8 files changed

+65
-47
lines changed

8 files changed

+65
-47
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
dev/false_positive_study.RData
1212
dev/false_positive_study.pdf
1313
dev/*rds
14+
temp*

R/ppcSeq.R

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ vb_iterative = function(model,
180180
output_samples,
181181
iter,
182182
tol_rel_obj,
183+
additional_parameters_to_save,
183184
...) {
184185
res = NULL
185186
i = 0
@@ -190,8 +191,9 @@ vb_iterative = function(model,
190191
output_samples = output_samples,
191192
iter = iter,
192193
tol_rel_obj = tol_rel_obj,
194+
sample_file = "temp_stan_sampling.txt",
195+
pars=c("counts_rng", "exposure_rate", additional_parameters_to_save),
193196
...
194-
#, pars=c("counts_rng", "exposure_rate", additional_parameters_to_save)
195197
)
196198
boolFalse <- T
197199
return(my_res)
@@ -392,7 +394,6 @@ add_deleterious_if_covariate_exists = function(input.df, X){
392394
)
393395
}
394396

395-
396397
merge_results = function(res_discovery, res_test, formula, gene_column, value_column, sample_column, do_check_only_on_detrimental){
397398

398399
res_discovery %>%
@@ -462,7 +463,6 @@ merge_results = function(res_discovery, res_test, formula, gene_column, value_co
462463
)
463464
}
464465

465-
466466
# Select only significant genes plus background for efficient normalisation
467467
# Input: tibble
468468
# Ouyput: tibble
@@ -524,7 +524,8 @@ run_model = function(model, full_bayes, chains, how_many_posterior_draws, inits_
524524
"counts_rng",
525525
"exposure_rate",
526526
additional_parameters_to_save
527-
)
527+
),
528+
sample_file = "temp_stan_sampling.txt"
528529
)
529530
)
530531
}
@@ -711,7 +712,10 @@ do_inference = function(my_df,
711712
inits_fx = "random",
712713
prior_from_discovery = tibble(`.variable` = character(),
713714
mean = numeric(),
714-
sd = numeric()), pass_fit = F, tol_rel_obj = 0.01) {
715+
sd = numeric()),
716+
pass_fit = F,
717+
tol_rel_obj = 0.01,
718+
write_on_disk = F) {
715719

716720
writeLines(sprintf("executing %s", "do_inference"))
717721

@@ -821,6 +825,7 @@ do_inference = function(my_df,
821825
CP = ncol(counts_package)
822826

823827
# Run model
828+
writeLines(sprintf("- Roughly the memory allocation for the fit object is %s Gb", object.size(1:(S * how_many_to_check * how_many_posterior_draws))/1e9))
824829

825830
# Set up environmental variable for threading
826831
Sys.setenv("STAN_NUM_THREADS" = my_cores)
@@ -844,7 +849,8 @@ do_inference = function(my_df,
844849
"counts_rng",
845850
"exposure_rate",
846851
additional_parameters_to_save
847-
)
852+
),
853+
sample_file = switch(write_on_disk %>% `!` %>% sum(1), "temp_stan_sampling.txt", NULL)
848854
),
849855

850856
# VB Repeat strategy for failures of vb
@@ -854,14 +860,12 @@ do_inference = function(my_df,
854860
output_samples = how_many_posterior_draws,
855861
iter = 50000,
856862
tol_rel_obj = 0.005,
857-
pars = c(
858-
"counts_rng",
859-
"exposure_rate",
860-
additional_parameters_to_save
861-
)
863+
additional_parameters_to_save = additional_parameters_to_save
862864
)
863865
)
864866

867+
writeLines("Fit object successfully loaded in memory. Going forward to parsing fir object")
868+
865869
# Parse and return
866870
fit %>%
867871
parse_fit(adj_prob_theshold) %>%
@@ -1022,7 +1026,8 @@ ppc_seq = function(input.df,
10221026
pass_fit = F,
10231027
do_check_only_on_detrimental = length(parse_formula(formula)) > 0,
10241028
tol_rel_obj = 0.01,
1025-
just_discovery = F
1029+
just_discovery = F,
1030+
write_on_disk = F
10261031
) {
10271032
# Prepare column same enquo
10281033
sample_column = enquo(sample_column)
@@ -1122,7 +1127,9 @@ ppc_seq = function(input.df,
11221127
intercept_shift_scale,
11231128
additional_parameters_to_save,
11241129
adj_prob_theshold = 0.05,
1125-
pass_fit = pass_fit, tol_rel_obj = tol_rel_obj
1130+
pass_fit = pass_fit,
1131+
tol_rel_obj = tol_rel_obj,
1132+
write_on_disk = write_on_disk
11261133
)
11271134

11281135
# For building some figure I just need the discovery run, return prematurely
@@ -1177,7 +1184,8 @@ ppc_seq = function(input.df,
11771184
to_exclude = to_exclude,
11781185
save_generated_quantities = save_generated_quantities,
11791186
tol_rel_obj = tol_rel_obj,
1180-
truncation_compensation = 0.7352941 # Taken by approximation study
1187+
truncation_compensation = 0.7352941, # Taken by approximation study
1188+
write_on_disk = write_on_disk
11811189
)
11821190

11831191
# Merge results and return

dev/HPC_execute_draw_tcga_qq_plots.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,21 @@ foreach(r = 1:6) %do% {
102102

103103
x = readRDS("dev/draw_qq_TCGA_with_covariates_1.rds")
104104

105+
TCGA_tbl %>%
106+
107+
# Filter
108+
filter(`CAPRA-S` %>% is.na %>% `!`) %>%
109+
separate(sample, c("data base", "laboratory", "patient"), sep="-", remove = F) %>%
110+
inner_join(
111+
(.) %>% distinct(sample, laboratory) %>% count(laboratory) %>% filter(n >= 8) %>% select(-n)
112+
) %>%
113+
mutate(risk = `CAPRA-S` <= 3) %>%
114+
115+
# Do check
116+
mutate(do_check = (!`house keeping`) & run==1) %>%
117+
118+
format_input(input.df, formula, sample_column, gene_column, value_column, do_check_column, significance_column, how_many_negative_controls)
119+
105120
x %>%
106121
attr("fit") %>%
107122
rstan::summary() %$% summary %>%

inst/stan/negBinomial_MPI.stan

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,12 @@ transformed data {
172172
}
173173
parameters {
174174

175-
176175
// Overall properties of the data
177-
real lambda_mu_raw; // So is compatible with logGamma prior
176+
real<offset=lambda_mu_mu> lambda_mu; // So is compatible with logGamma prior
178177
real<lower=0> lambda_sigma;
179178
real lambda_skew;
180179

181-
vector[S] exposure_rate_raw;
180+
vector<multiplier = exposure_rate_multiplier>[S] exposure_rate;
182181

183182
// Gene-wise properties of the data
184183
row_vector[G] intercept;
@@ -195,22 +194,15 @@ parameters {
195194
}
196195
transformed parameters {
197196

198-
// For better adaptation
199-
real lambda_mu = lambda_mu_raw + lambda_mu_mu;
200-
//row_vector[G] intercept = (intercept_raw * intercept_shift_scale[2]) + intercept_shift_scale[1];
201-
vector[S] exposure_rate = exposure_rate_raw * exposure_rate_multiplier;
202-
203197
// Sigma
204198
vector[G] sigma = 1.0 ./ exp(sigma_raw) ;
205199
matrix[C,G] alpha = merge_coefficients(intercept, alpha_sub_1, alpha_2, C, S, G);
206-
matrix[S,G] lambda_log_param = X * alpha;
207-
208-
200+
matrix[S,G] lambda_log_param = X * alpha;
209201
}
210202

211203
model {
212204

213-
lambda_mu_raw ~ normal(0,2);
205+
lambda_mu ~ normal(lambda_mu_mu,2);
214206
lambda_sigma ~ normal(0,2);
215207
lambda_skew ~ normal(0,1);
216208

@@ -219,15 +211,15 @@ model {
219211
sigma_sigma ~ normal(0,2);
220212

221213
// Gene-wise properties of the data
222-
to_vector(intercept) ~ skew_normal(lambda_mu,lambda_sigma, lambda_skew);
214+
to_vector(intercept) ~ skew_normal(lambda_mu + lambda_mu_mu ,lambda_sigma, lambda_skew);
223215
if(C>=2) alpha_sub_1 ~ double_exponential(0,1);
224216
if(C>=3) to_vector(alpha_2) ~ normal(0,2.5);
225217

226-
sigma_raw ~ normal(sigma_slope * alpha[1,] + sigma_intercept,sigma_sigma);
218+
sigma_raw ~ normal(sigma_slope * intercept + sigma_intercept,sigma_sigma);
227219

228220
// Exposure prior
229-
exposure_rate_raw ~ normal(0,1);
230-
sum(exposure_rate_raw) ~ normal(0, 0.001 * S);
221+
exposure_rate ~ normal(0,1);
222+
sum(exposure_rate) ~ normal(0, 0.001 * S);
231223

232224
//Gene-wise properties of the data
233225
target += sum(map_rect(
@@ -264,9 +256,9 @@ model {
264256

265257
}
266258
generated quantities{
267-
vector[G] counts_rng[S];
259+
vector[how_many_to_check] counts_rng[S];
268260

269-
for(g in 1:G) for(s in 1:S)
261+
for(g in 1:how_many_to_check) for(s in 1:S)
270262
counts_rng[s,g] = neg_binomial_2_log_rng(exposure_rate[s] + lambda_log_param[s,g], sigma[g] * truncation_compensation);
271263

272264
}

man/do_inference.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ppc_seq.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/vb_iterative.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-ppcSeq.R

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ test_that("dummy",expect_equal(1,1))
66
#
77
# test_that("Quick test",{
88
#
9-
# FDR_threshold = 0.01
10-
#
11-
# res =
12-
# ppcSeq::counts %>%
13-
# mutate(is_significant = FDR < FDR_threshold) %>%
14-
# ppc_seq(
15-
# formula = ~ Label,
16-
# significance_column = PValue,
17-
# do_check_column = is_significant,
18-
# value_column = value,
19-
# percent_false_positive_genes = "5%", tol_rel_obj = 0.01
20-
# )
9+
FDR_threshold = 0.01
10+
11+
res =
12+
ppcSeq::counts %>%
13+
mutate(is_significant = FDR < FDR_threshold) %>%
14+
ppc_seq(
15+
formula = ~ Label,
16+
significance_column = PValue,
17+
do_check_column = is_significant,
18+
value_column = value,
19+
percent_false_positive_genes = "5%", tol_rel_obj = 0.01
20+
)
2121
#
2222
# expect_equal(
2323
#

0 commit comments

Comments
 (0)