Skip to content

Commit 2550bbd

Browse files
authored
Merge pull request #3312 from stan-dev/feature/3299-improved-ESS-Rhat
Feature/3299 improved ess rhat
2 parents 6df13e9 + f67a142 commit 2550bbd

18 files changed

+3947
-579
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#ifndef STAN_ANALYZE_MCMC_CHECK_CHAINS_HPP
2+
#define STAN_ANALYZE_MCMC_CHECK_CHAINS_HPP
3+
4+
#include <stan/math/prim.hpp>
5+
#include <cmath>
6+
#include <cstdlib>
7+
#include <limits>
8+
#include <utility>
9+
#include <vector>
10+
11+
namespace stan {
12+
namespace analyze {
13+
14+
/**
15+
* Checks that values across all matrix columns finite and non-identical.
16+
*
17+
* @param chains matrix of draws, one column per chain
18+
* @return bool true if OK, false otherwise
19+
*/
20+
inline bool is_finite_and_varies(const Eigen::MatrixXd chains) {
21+
size_t num_chains = chains.cols();
22+
size_t num_samples = chains.rows();
23+
Eigen::VectorXd first_draws = Eigen::VectorXd::Zero(num_chains);
24+
for (std::size_t i = 0; i < num_chains; ++i) {
25+
first_draws(i) = chains.col(i)(0);
26+
for (int j = 0; j < num_samples; ++j) {
27+
if (!std::isfinite(chains.col(i)(j)))
28+
return false;
29+
}
30+
if (chains.col(i).isApproxToConstant(first_draws(i))) {
31+
return false;
32+
}
33+
}
34+
if (num_chains > 1 && first_draws.isApproxToConstant(first_draws(0))) {
35+
return false;
36+
}
37+
return true;
38+
}
39+
40+
} // namespace analyze
41+
} // namespace stan
42+
43+
#endif

src/stan/analyze/mcmc/compute_potential_scale_reduction.hpp

Lines changed: 0 additions & 236 deletions
Original file line numberDiff line numberDiff line change
@@ -17,156 +17,6 @@
1717
namespace stan {
1818
namespace analyze {
1919

20-
/**
21-
* Computes normalized average ranks for draws. Transforming them to normal
22-
* scores using inverse normal transformation and a fractional offset. Based on
23-
* paper https://arxiv.org/abs/1903.08008
24-
* @param chains stores chains in columns
25-
* @return normal scores for average ranks of draws
26-
*/
27-
inline Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& chains) {
28-
const Eigen::Index rows = chains.rows();
29-
const Eigen::Index cols = chains.cols();
30-
const Eigen::Index size = rows * cols;
31-
32-
std::vector<std::pair<double, int>> value_with_index(size);
33-
34-
for (Eigen::Index i = 0; i < size; ++i) {
35-
value_with_index[i] = {chains(i), i};
36-
}
37-
38-
std::sort(value_with_index.begin(), value_with_index.end());
39-
40-
Eigen::MatrixXd rank_matrix = Eigen::MatrixXd::Zero(rows, cols);
41-
42-
// Assigning average ranks
43-
for (Eigen::Index i = 0; i < size; ++i) {
44-
// Handle ties by averaging ranks
45-
Eigen::Index j = i + 1;
46-
double sum_ranks = j;
47-
Eigen::Index count = 1;
48-
49-
while (j < size && value_with_index[j].first == value_with_index[i].first) {
50-
sum_ranks += j + 1; // Rank starts from 1
51-
++j;
52-
++count;
53-
}
54-
double avg_rank = sum_ranks / count;
55-
boost::math::normal_distribution<double> dist;
56-
for (std::size_t k = i; k < j; ++k) {
57-
double p = (avg_rank - 3.0 / 8.0) / (size - 2.0 * 3.0 / 8.0 + 1.0);
58-
const Eigen::Index index = value_with_index[k].second;
59-
rank_matrix(index) = boost::math::quantile(dist, p);
60-
}
61-
i = j - 1; // Skip over tied elements
62-
}
63-
return rank_matrix;
64-
}
65-
66-
/**
67-
* Computes square root of marginal posterior variance of the estimand by the
68-
* weigted average of within-chain variance W and between-chain variance B.
69-
*
70-
* @param chains stores chains in columns
71-
* @return square root of ((N-1)/N)W + B/N
72-
*/
73-
inline double rhat(const Eigen::MatrixXd& chains) {
74-
const Eigen::Index num_chains = chains.cols();
75-
const Eigen::Index num_draws = chains.rows();
76-
77-
Eigen::RowVectorXd within_chain_means = chains.colwise().mean();
78-
double across_chain_mean = within_chain_means.mean();
79-
double between_variance
80-
= num_draws
81-
* (within_chain_means.array() - across_chain_mean).square().sum()
82-
/ (num_chains - 1);
83-
double within_variance =
84-
// Divide each row by chains and get sum of squares for each chain
85-
// (getting a vector back)
86-
((chains.rowwise() - within_chain_means)
87-
.array()
88-
.square()
89-
.colwise()
90-
// divide each sum of square by num_draws, sum the sum of squares,
91-
// and divide by num chains
92-
.sum()
93-
/ (num_draws - 1.0))
94-
.sum()
95-
/ num_chains;
96-
97-
return sqrt((between_variance / within_variance + num_draws - 1) / num_draws);
98-
}
99-
100-
/**
101-
* Computes the potential scale reduction (Rhat) using rank based diagnostic for
102-
* the specified parameter across all kept samples. Based on paper
103-
* https://arxiv.org/abs/1903.08008
104-
*
105-
* Current implementation assumes draws are stored in contiguous
106-
* blocks of memory. Chains are trimmed from the back to match the
107-
* length of the shortest chain.
108-
*
109-
* @param chain_begins stores pointers to arrays of chains
110-
* @param chain_sizes stores sizes of chains
111-
* @return potential scale reduction for the specified parameter
112-
*/
113-
inline std::pair<double, double> compute_potential_scale_reduction_rank(
114-
const std::vector<const double*>& chain_begins,
115-
const std::vector<size_t>& chain_sizes) {
116-
std::vector<const double*> nonzero_chain_begins;
117-
std::vector<std::size_t> nonzero_chain_sizes;
118-
nonzero_chain_begins.reserve(chain_begins.size());
119-
nonzero_chain_sizes.reserve(chain_sizes.size());
120-
for (size_t i = 0; i < chain_sizes.size(); ++i) {
121-
if (chain_sizes[i]) {
122-
nonzero_chain_begins.push_back(chain_begins[i]);
123-
nonzero_chain_sizes.push_back(chain_sizes[i]);
124-
}
125-
}
126-
if (!nonzero_chain_sizes.size()) {
127-
return {std::numeric_limits<double>::quiet_NaN(),
128-
std::numeric_limits<double>::quiet_NaN()};
129-
}
130-
std::size_t num_nonzero_chains = nonzero_chain_sizes.size();
131-
std::size_t min_num_draws = nonzero_chain_sizes[0];
132-
for (std::size_t chain = 1; chain < num_nonzero_chains; ++chain) {
133-
min_num_draws = std::min(min_num_draws, nonzero_chain_sizes[chain]);
134-
}
135-
136-
// check if chains are constant; all equal to first draw's value
137-
bool are_all_const = false;
138-
Eigen::VectorXd init_draw = Eigen::VectorXd::Zero(num_nonzero_chains);
139-
Eigen::MatrixXd draws_matrix(min_num_draws, num_nonzero_chains);
140-
141-
for (std::size_t chain = 0; chain < num_nonzero_chains; chain++) {
142-
Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, 1>> draws(
143-
nonzero_chain_begins[chain], nonzero_chain_sizes[chain]);
144-
145-
for (std::size_t n = 0; n < min_num_draws; n++) {
146-
if (!std::isfinite(draws(n))) {
147-
return {std::numeric_limits<double>::quiet_NaN(),
148-
std::numeric_limits<double>::quiet_NaN()};
149-
}
150-
draws_matrix(n, chain) = draws(n);
151-
}
152-
153-
init_draw(chain) = draws(0);
154-
are_all_const |= !draws.isApproxToConstant(draws(0));
155-
}
156-
// If all chains are constant then return NaN
157-
if (are_all_const && init_draw.isApproxToConstant(init_draw(0))) {
158-
return {std::numeric_limits<double>::quiet_NaN(),
159-
std::numeric_limits<double>::quiet_NaN()};
160-
}
161-
162-
double rhat_bulk = rhat(rank_transform(draws_matrix));
163-
double rhat_tail = rhat(rank_transform(
164-
(draws_matrix.array() - math::quantile(draws_matrix.reshaped(), 0.5))
165-
.abs()));
166-
167-
return std::make_pair(rhat_bulk, rhat_tail);
168-
}
169-
17020
/**
17121
* Computes the potential scale reduction (Rhat) for the specified
17222
* parameter across all kept samples.
@@ -248,33 +98,9 @@ inline double compute_potential_scale_reduction(
24898
* num_chains / (num_chains - 1);
24999
double var_within = chain_var.mean();
250100

251-
// rewrote [(n-1)*W/n + B/n]/W as (n-1+ B/W)/n
252101
return sqrt((var_between / var_within + num_draws - 1) / num_draws);
253102
}
254103

255-
/**
256-
* Computes the potential scale reduction (Rhat) using rank based diagnostic for
257-
* the specified parameter across all kept samples. Based on paper
258-
* https://arxiv.org/abs/1903.08008
259-
*
260-
* See more details in Stan reference manual section "Potential
261-
* Scale Reduction". http://mc-stan.org/users/documentation
262-
*
263-
* Current implementation assumes draws are stored in contiguous
264-
* blocks of memory. Chains are trimmed from the back to match the
265-
* length of the shortest chain. Argument size will be broadcast to
266-
* same length as draws.
267-
*
268-
* @param chain_begins stores pointers to arrays of chains
269-
* @param size stores sizes of chains
270-
* @return potential scale reduction for the specified parameter
271-
*/
272-
inline std::pair<double, double> compute_potential_scale_reduction_rank(
273-
const std::vector<const double*>& chain_begins, size_t size) {
274-
std::vector<size_t> sizes(chain_begins.size(), size);
275-
return compute_potential_scale_reduction_rank(chain_begins, sizes);
276-
}
277-
278104
/**
279105
* Computes the potential scale reduction (Rhat) for the specified
280106
* parameter across all kept samples.
@@ -298,42 +124,6 @@ inline double compute_potential_scale_reduction(
298124
return compute_potential_scale_reduction(draws, sizes);
299125
}
300126

301-
/**
302-
* Computes the potential scale reduction (Rhat) using rank based diagnostic for
303-
* the specified parameter across all kept samples. Based on paper
304-
* https://arxiv.org/abs/1903.08008
305-
*
306-
* When the number of total draws N is odd, the (N+1)/2th draw is ignored.
307-
*
308-
* See more details in Stan reference manual section "Potential
309-
* Scale Reduction". http://mc-stan.org/users/documentation
310-
*
311-
* Current implementation assumes draws are stored in contiguous
312-
* blocks of memory. Chains are trimmed from the back to match the
313-
* length of the shortest chain.
314-
*
315-
* @param chain_begins stores pointers to arrays of chains
316-
* @param chain_sizes stores sizes of chains
317-
* @return potential scale reduction for the specified parameter
318-
*/
319-
inline std::pair<double, double> compute_split_potential_scale_reduction_rank(
320-
const std::vector<const double*>& chain_begins,
321-
const std::vector<size_t>& chain_sizes) {
322-
size_t num_chains = chain_sizes.size();
323-
size_t num_draws = chain_sizes[0];
324-
for (size_t chain = 1; chain < num_chains; ++chain) {
325-
num_draws = std::min(num_draws, chain_sizes[chain]);
326-
}
327-
328-
std::vector<const double*> split_draws
329-
= split_chains(chain_begins, chain_sizes);
330-
331-
size_t half = std::floor(num_draws / 2.0);
332-
std::vector<size_t> half_sizes(2 * num_chains, half);
333-
334-
return compute_potential_scale_reduction_rank(split_draws, half_sizes);
335-
}
336-
337127
/**
338128
* Computes the split potential scale reduction (Rhat) for the
339129
* specified parameter across all kept samples. When the number of
@@ -366,32 +156,6 @@ inline double compute_split_potential_scale_reduction(
366156
return compute_potential_scale_reduction(split_draws, half_sizes);
367157
}
368158

369-
/**
370-
* Computes the potential scale reduction (Rhat) using rank based diagnostic for
371-
* the specified parameter across all kept samples. Based on paper
372-
* https://arxiv.org/abs/1903.08008
373-
*
374-
* When the number of total draws N is odd, the (N+1)/2th draw is ignored.
375-
*
376-
* See more details in Stan reference manual section "Potential
377-
* Scale Reduction". http://mc-stan.org/users/documentation
378-
*
379-
* Current implementation assumes draws are stored in contiguous
380-
* blocks of memory. Chains are trimmed from the back to match the
381-
* length of the shortest chain. Argument size will be broadcast to
382-
* same length as draws.
383-
*
384-
* @param chain_begins stores pointers to arrays of chains
385-
* @param size stores sizes of chains
386-
* @return potential scale reduction for the specified parameter
387-
*/
388-
inline std::pair<double, double> compute_split_potential_scale_reduction_rank(
389-
const std::vector<const double*>& chain_begins, size_t size) {
390-
size_t num_chains = chain_begins.size();
391-
std::vector<size_t> sizes(num_chains, size);
392-
return compute_split_potential_scale_reduction_rank(chain_begins, sizes);
393-
}
394-
395159
/**
396160
* Computes the split potential scale reduction (Rhat) for the
397161
* specified parameter across all kept samples. When the number of
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#ifndef STAN_ANALYZE_MCMC_RANK_NORMALIZATION_HPP
2+
#define STAN_ANALYZE_MCMC_RANK_NORMALIZATION_HPP
3+
4+
#include <stan/math/prim.hpp>
5+
#include <boost/math/distributions/normal.hpp>
6+
#include <algorithm>
7+
#include <cmath>
8+
#include <vector>
9+
#include <limits>
10+
11+
namespace stan {
12+
namespace analyze {
13+
14+
/**
15+
* Computes normalized average ranks for pooled draws. Normal scores computed
16+
* using inverse normal transformation and a fractional offset. Based on paper
17+
* https://arxiv.org/abs/1903.08008
18+
*
19+
* @param chains matrix of draws, one column per chain
20+
* @return normal scores for average ranks of draws
21+
*/
22+
inline Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& chains) {
23+
const Eigen::Index rows = chains.rows();
24+
const Eigen::Index cols = chains.cols();
25+
const Eigen::Index size = rows * cols;
26+
27+
std::vector<std::pair<double, int>> value_with_index(size);
28+
for (Eigen::Index i = 0; i < size; ++i) {
29+
value_with_index[i] = {chains(i), i};
30+
}
31+
32+
std::sort(value_with_index.begin(), value_with_index.end());
33+
Eigen::MatrixXd rank_matrix = Eigen::MatrixXd::Zero(rows, cols);
34+
// Assigning average ranks
35+
for (Eigen::Index i = 0; i < size; ++i) {
36+
// Handle ties by averaging ranks
37+
Eigen::Index j = i + 1;
38+
double sum_ranks = j;
39+
Eigen::Index count = 1;
40+
41+
while (j < size && value_with_index[j].first == value_with_index[i].first) {
42+
sum_ranks += j + 1; // Rank starts from 1
43+
++j;
44+
++count;
45+
}
46+
double avg_rank = sum_ranks / count;
47+
boost::math::normal_distribution<double> dist;
48+
for (std::size_t k = i; k < j; ++k) {
49+
double p = (avg_rank - 0.375) / (size + 0.25);
50+
const Eigen::Index index = value_with_index[k].second;
51+
rank_matrix(index) = boost::math::quantile(dist, p);
52+
}
53+
i = j - 1; // Skip over tied elements
54+
}
55+
return rank_matrix;
56+
}
57+
58+
} // namespace analyze
59+
} // namespace stan
60+
61+
#endif

0 commit comments

Comments
 (0)