@@ -37,21 +37,34 @@ get_p(const std::vector<T> &v, const gsl_vector *params) {
3737}
3838
3939static inline auto
40- cache_log1p_factors (Regression ®, const double phi ) {
40+ set_max_r_count (Regression ®) {
4141 const auto &cumul = reg.cumul ;
4242 const auto max_itr = std::max_element (
4343 std::cbegin (cumul), std::cend (cumul), [](const auto &a, const auto &b) {
4444 return std::size (a.r_counts ) < std::size (b.r_counts );
4545 });
46- const std::size_t max_k = std::size (max_itr->r_counts );
47- auto &cache = reg.log1p_fact_v ;
46+ reg.max_r_count = std::size (max_itr->r_counts );
4847 // ADS: avoid the realloc that can happen even for resize(smaller_size)
49- if (max_k > std::size (cache))
50- cache.resize (max_k, 0.0 );
48+ if (reg.max_r_count > std::size (reg.cache ))
49+ reg.cache .resize (reg.max_r_count );
50+ }
51+
52+ static inline auto
53+ cache_log1p_factors (Regression ®, const double phi) {
54+ const std::size_t max_k = reg.max_r_count ;
55+ auto &cache = reg.cache ;
5156 for (std::size_t k = 0 ; k < max_k; ++k)
5257 cache[k] = std::log1p (phi * (k - 1.0 ));
5358}
5459
60+ static inline auto
61+ cache_dispersion_effect (Regression ®, const double phi) {
62+ const std::size_t max_k = reg.max_r_count ;
63+ auto &cache = reg.cache ;
64+ for (std::size_t k = 0 ; k < max_k; ++k)
65+ cache[k] = (k - 1.0 ) / (1.0 + phi * (k - 1.0 ));
66+ }
67+
5568[[nodiscard]] static double
5669log_likelihood (const gsl_vector *params, Regression ®) {
5770 const auto phi = logistic (gsl_vector_get (params, reg.design .n_factors ()));
@@ -64,7 +77,7 @@ log_likelihood(const gsl_vector *params, Regression ®) {
6477 // ADS: precompute the log1p(phi * (k - 1.0)) values, which are reused for
6578 // each group.
6679 cache_log1p_factors (reg, phi);
67- const auto &log1p_fact_v = reg.log1p_fact_v ;
80+ const auto &log1p_fact_v = reg.cache ;
6881
6982 double log_lik = 0.0 ;
7083 for (std::size_t g_idx = 0 ; g_idx < n_groups; ++g_idx) {
@@ -102,10 +115,14 @@ gradient(const gsl_vector *params, Regression ®, gsl_vector *output) {
102115 for (auto g_idx = 0u ; g_idx < n_groups; ++g_idx)
103116 p_v[g_idx] = get_p (groups[g_idx], params);
104117
118+ cache_dispersion_effect (reg, phi);
119+ const auto &dispersion_effect = reg.cache ; // (k-1)/(1 + phi(k-1))
120+
105121 // init output to zero for all factors
106122 gsl_vector_set_all (output, 0.0 );
107123 auto &data = output->data ;
108124
125+ double disp_deriv = 0.0 ;
109126 for (std::size_t g_idx = 0 ; g_idx < n_groups; ++g_idx) {
110127 const auto p = p_v[g_idx];
111128 const auto one_minus_p = 1.0 - p;
@@ -114,13 +131,26 @@ gradient(const gsl_vector *params, Regression ®, gsl_vector *output) {
114131
115132 const auto denom_term1 = one_minus_phi * p;
116133 const auto &cumul_y = cumul[g_idx].m_counts ;
117- for (auto k = 0u ; k < std::size (cumul_y); ++k)
118- deriv += cumul_y[k] / (denom_term1 + phi * k);
134+ const auto y_lim = std::size (cumul_y);
135+ for (auto k = 0u ; k < y_lim; ++k) {
136+ const auto common_factor = cumul_y[k] / (denom_term1 + phi * k);
137+ deriv += common_factor;
138+ disp_deriv += (k - p) * common_factor;
139+ }
119140
120141 const auto denom_term2 = one_minus_phi * one_minus_p;
121142 const auto &cumul_d = cumul[g_idx].d_counts ;
122- for (auto k = 0u ; k < std::size (cumul_d); ++k)
123- deriv -= cumul_d[k] / (denom_term2 + phi * k);
143+ const auto d_lim = std::size (cumul_d);
144+ for (auto k = 0u ; k < d_lim; ++k) {
145+ const auto common_factor = cumul_d[k] / (denom_term2 + phi * k);
146+ deriv -= common_factor;
147+ disp_deriv += (k - one_minus_p) * common_factor;
148+ }
149+
150+ const auto &cumul_n = cumul[g_idx].r_counts ;
151+ const auto n_lim = std::size (cumul_n);
152+ for (auto k = 0u ; k < n_lim; ++k)
153+ disp_deriv -= cumul_n[k] * dispersion_effect[k];
124154
125155 const auto &g = groups[g_idx];
126156 const auto denom_term1_one_minus_p = denom_term1 * one_minus_p;
@@ -131,31 +161,7 @@ gradient(const gsl_vector *params, Regression ®, gsl_vector *output) {
131161 data[fact_idx] += deriv * (denom_term1_one_minus_p * level);
132162 }
133163 }
134-
135- double deriv = 0.0 ;
136- auto cumul_itr = std::cbegin (cumul);
137- for (auto g_idx = 0u ; g_idx < n_groups; ++g_idx, ++cumul_itr) {
138- const auto p = get_p (groups[g_idx], params);
139- const auto one_minus_p = 1.0 - p;
140-
141- const auto term1 = one_minus_phi * p;
142- const auto &cumul_y = cumul_itr->m_counts ;
143- const auto y_lim = std::size (cumul_y);
144- for (auto k = 0u ; k < y_lim; ++k)
145- deriv += cumul_y[k] * (k - p) / (term1 + phi * k);
146-
147- const auto term2 = one_minus_phi * one_minus_p;
148- const auto &cumul_d = cumul_itr->d_counts ;
149- const auto d_lim = std::size (cumul_d);
150- for (auto k = 0u ; k < d_lim; ++k)
151- deriv += cumul_d[k] * (k - one_minus_p) / (term2 + phi * k);
152-
153- const auto &cumul_n = cumul_itr->r_counts ;
154- const auto n_lim = std::size (cumul_n);
155- for (auto k = 0u ; k < n_lim; ++k)
156- deriv -= cumul_n[k] * (k - 1.0 ) / (1.0 + phi * (k - 1.0 ));
157- }
158- gsl_vector_set (output, n_factors, deriv * (phi * one_minus_phi));
164+ gsl_vector_set (output, n_factors, disp_deriv * (phi * one_minus_phi));
159165}
160166
161167[[nodiscard]] static double
@@ -230,6 +236,7 @@ fit_regression_model(Regression &r, std::vector<double> ¶ms_init) {
230236 const auto max_iter = Regression::max_iter;
231237
232238 get_cumulative (r.design .group_id , r.design .n_groups (), r.props .mc , r.cumul );
239+ set_max_r_count (r);
233240
234241 // one more than the number of factors
235242 const std::size_t n_params = r.n_factors () + 1 ;
0 commit comments