Skip to content

Commit 074ab4b

Browse files
committed
rejection sampling tests
1 parent 8fb556d commit 074ab4b

File tree

12 files changed

+113
-238
lines changed

12 files changed

+113
-238
lines changed

R/vendor.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ vendor <- function(path = "./src/vendor") {
6363
cpp4r_version,
6464
Sys.Date()
6565
),
66-
file.path(path2, "00-cpp4r-vendoring-info.txt")
66+
file.path(path, "00-cpp4r-vendoring-info.txt")
6767
)
6868

6969
file.copy(

Rcppbenchgibbs/R/Rcppbenchgibbs-package.R

Lines changed: 0 additions & 3 deletions
This file was deleted.

Rcppbenchgibbs/R/base.R

Lines changed: 0 additions & 27 deletions
This file was deleted.

Rcppbenchgibbs/src/main.cpp

Lines changed: 0 additions & 37 deletions
This file was deleted.

cpp11benchgibbs/src/main.cpp

Lines changed: 0 additions & 50 deletions
This file was deleted.

cpp4rbenchgibbs/src/main.cpp

Lines changed: 0 additions & 61 deletions
This file was deleted.

cpp4rexamples/DESCRIPTION

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@ Package: cpp4rexamples
22
Type: Package
33
Title: ADD TITLE
44
Version: 0.1
5-
Authors@R: c(
6-
person(
7-
given = "YOUR",
8-
family = "NAME",
9-
role = c("aut", "cre"),
10-
email = "[email protected]",
11-
comment = c(ORCID = "0000-0001-0002-0003"))
12-
)
5+
Authors@R:
6+
person("Mauricio", "Vargas Sepulveda", role = c("aut","cre","cph"),
7+
email = "[email protected]",
8+
comment = c(ORCID = "0000-0003-1017-7574")
139
Suggests:
1410
knitr,
1511
rmarkdown

cpp4rsampling/src/main.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,32 @@ class local_rng {
3030
// Manage R's RNG state
3131
local_rng rng_state;
3232

33-
// Dynamic vector that grows as we accept samples
33+
// Pre-calculate acceptance rate for better initial allocation
34+
double z_lower = (lower - mu) / sigma;
35+
double z_upper = (upper - mu) / sigma;
36+
double acceptance_rate = Rf_pnorm5(z_upper, 0.0, 1.0, 1, 0) - Rf_pnorm5(z_lower, 0.0, 1.0, 1, 0);
37+
38+
// Pre-allocate based on expected number of samples needed
39+
// Add 20% buffer for variance + ensure minimum reasonable size
40+
R_xlen_t estimated_needed = static_cast<R_xlen_t>(n_samples / acceptance_rate * 1.2);
41+
estimated_needed = std::max(estimated_needed, static_cast<R_xlen_t>(n_samples));
42+
3443
writable::doubles accepted_samples;
44+
accepted_samples.reserve(estimated_needed);
3545

3646
// Keep sampling until we have enough accepted samples
37-
while (accepted_samples.size() < n_samples) {
47+
// Cast to int once for faster comparison in tight loop
48+
int target_samples = static_cast<int>(n_samples);
49+
50+
while (static_cast<int>(accepted_samples.size()) < target_samples) {
3851
// Generate candidate from normal distribution
3952
double candidate = Rf_rnorm(mu, sigma);
4053

41-
// Accept if within bounds
42-
if (candidate >= lower && candidate <= upper) {
54+
// Fast bounds check: single branch for most common case
55+
if (__builtin_expect(candidate >= lower && candidate <= upper, 1)) {
4356
accepted_samples.push_back(candidate);
4457
}
45-
// If rejected, we just continue sampling - this is why we need dynamic growth!
58+
// If rejected, we just continue sampling
4659
}
4760

4861
return accepted_samples;

dev/benchmark.R

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,29 @@
1-
library(cpp11test)
2-
library(cpp4rtest)
1+
library(cpp11sampling)
2+
library(cpp4rsampling)
3+
library(Rcppsampling)
4+
library(bench)
5+
library(purrr)
36

4-
cases <- expand.grid(
5-
len = 1e7,
6-
vector = c("normal", "altrep"),
7-
method = c("for", "foreach", "accumulate"),
8-
pkg = c("cpp4r", "Rcpp"),
9-
stringsAsFactors = FALSE
10-
)
11-
12-
# Add special case
13-
cases <- rbind(list(len = 1e7, vector = "normal", method = "for2", pkg = "cpp4r"), cases)
7+
rejection_sizes <- seq(25000, 100000, by = 25000)
148

15-
b_sum <- bench::press(
16-
.grid = cases,
17-
{
18-
seq_real <- function(x) as.numeric(seq_len(x))
19-
funs <- c("normal" = rnorm, "altrep" = seq_real)
20-
x <- funs[[vector]](len)
21-
fun <- match.fun(sprintf("%ssum_dbl_%s_", ifelse(pkg == "cpp4r", "", paste0(pkg, "_")), method))
22-
bench::mark(
23-
fun(x)
9+
rejection_bench <- map_df(
10+
rejection_sizes,
11+
function(n_samples) {
12+
cat("Target samples:", n_samples, "\n")
13+
d <- bench::mark(
14+
# "R" = { set.seed(123); cpp4rsampling::rejection_sampling(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5) },
15+
"cpp4r" = { set.seed(123); cpp4rsampling::rejection_sampling_cpp4r(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5) },
16+
"cpp11" = { set.seed(123); cpp11sampling::rejection_sampling_cpp11(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5) },
17+
"Rcpp" = { set.seed(123); Rcppsampling::rejection_sampling_Rcpp(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5) },
18+
iterations = 10
2419
)
25-
}
26-
)[c("pkg", "method", "vector", "median", "mem_alloc")]
2720

28-
saveRDS(b_sum, "sum.Rds", version = 2)
21+
d$len <- n_samples
22+
23+
d
24+
}
25+
)
2926

30-
grid <- expand.grid(len = seq(10000, 50000, by = 10000), pkg = c("cpp4r", "cpp11", "Rcpp"), stringsAsFactors = FALSE)
31-
b_release <- bench::press(.grid = grid, {
32-
fun <- match.fun(sprintf("%s_release_", pkg))
33-
bench::mark(
34-
fun(len),
35-
min_iterations = 100
36-
)
37-
})[c("len", "pkg", "median")]
27+
rejection_bench[, c("expression", "median", "mem_alloc", "len")]
3828

39-
saveRDS(b_release, "release.Rds", version = 2)
29+
saveRDS(rejection_bench, "vignettes/rejection_bench.rds", compress = "xz")

dev/rejection-benchmark.R

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
library(cpp11benchgibbs)
2+
library(cpp4rbenchgibbs)
3+
library(Rcppbenchgibbs)
4+
library(bench)
5+
library(purrr)
6+
7+
cat("=== Benchmarking Dynamic Vector Growth with push_back ===\n")
8+
cat("cpp4r/cpp11: Efficient - reserves extra space when growing vectors\n")
9+
cat("Rcpp: Inefficient - copies entire vector on each push_back operation\n\n")
10+
11+
cat("=== 1. Rejection Sampling Performance ===\n")
12+
cat("Perfect use case: We don't know how many candidates we'll need to generate\n")
13+
cat("to get the desired number of accepted samples.\n\n")
14+
15+
# Test rejection sampling with different target sample sizes
16+
rejection_sizes <- c(1000, 5000, 10000, 25000)
17+
18+
rejection_bench <- map_df(
19+
rejection_sizes,
20+
function(n_samples) {
21+
cat("Target samples:", n_samples, "\n")
22+
bench::mark(
23+
"cpp4r" = cpp4rbenchgibbs::rejection_sampling_cpp4r(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5),
24+
"cpp11" = cpp11benchgibbs::rejection_sampling_cpp11(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5),
25+
"Rcpp" = Rcppbenchgibbs::rejection_sampling_Rcpp(n_samples, mu = 0, sigma = 1, lower = -1.5, upper = 1.5),
26+
min_iterations = 5,
27+
max_iterations = 20,
28+
check = FALSE # Results will differ due to randomness
29+
)
30+
}
31+
)
32+
33+
print(rejection_bench[c("expression", "median", "mem_alloc", "n_itr")])
34+
35+
cat("\n=== 2. Bootstrap with Variable Sample Sizes ===\n")
36+
cat("Another natural use case: Bootstrap samples with varying sizes\n\n")
37+
38+
# Create test data
39+
test_data <- rnorm(1000)
40+
41+
bootstrap_bench <- bench::mark(
42+
"cpp4r" = cpp4rbenchgibbs::bootstrap_variable_cpp4r(test_data, min_size = 50, max_size = 200, n_bootstrap = 1000),
43+
"cpp11" = cpp11benchgibbs::bootstrap_variable_cpp11(test_data, min_size = 50, max_size = 200, n_bootstrap = 1000),
44+
"Rcpp" = Rcppbenchgibbs::bootstrap_variable_Rcpp(test_data, min_size = 50, max_size = 200, n_bootstrap = 1000),
45+
min_iterations = 3,
46+
max_iterations = 10,
47+
check = FALSE
48+
)
49+
50+
print(bootstrap_bench[c("expression", "median", "mem_alloc", "n_itr")])
51+
52+
cat("\n=== Key Performance Insights ===\n")
53+
cat("1. Rejection Sampling: Rcpp should show quadratic time complexity\n")
54+
cat(" due to vector copying, while cpp4r/cpp11 remain linear\n")
55+
cat("2. Memory Usage: Rcpp will allocate much more memory due to\n")
56+
cat(" repeated copying of growing vectors\n")
57+
cat("3. Real-world Impact: These patterns appear in Monte Carlo methods,\n")
58+
cat(" adaptive algorithms, and data processing pipelines\n\n")
59+
60+
# Demonstrate the acceptance rate for rejection sampling
61+
cat("=== Rejection Sampling Statistics ===\n")
62+
set.seed(42)
63+
sample1 <- cpp4rbenchgibbs::rejection_sampling_cpp4r(1000, mu = 0, sigma = 1, lower = -1.5, upper = 1.5)
64+
acceptance_rate <- pnorm(1.5) - pnorm(-1.5) # Theoretical acceptance rate
65+
cat("Theoretical acceptance rate for [-1.5, 1.5] bounds:", round(acceptance_rate, 3), "\n")
66+
cat("Average candidates needed per accepted sample:", round(1/acceptance_rate, 1), "\n")
67+
cat("For 25,000 samples, expect ~", round(25000/acceptance_rate), "candidates to be generated\n")

0 commit comments

Comments
 (0)