|
| 1 | +#include <math.h> // for fabs |
| 2 | + |
| 3 | +// Find an approximate row of \hat{Sigma}^{-1} |
| 4 | + |
| 5 | +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf |
| 6 | + |
| 7 | +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 |
| 8 | +// where l is `linear_func` below |
| 9 | + |
| 10 | +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf |
| 11 | +// Therefore we don't have to negate the answer to get theta. |
| 12 | +// Update one coordinate |
| 13 | + |
| 14 | +double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ |
| 15 | + double *linear_func_ptr, /* Linear term in objective */ |
| 16 | + int *ever_active_ptr, /* Ever active set: 0-based */ |
| 17 | + int *nactive_ptr, /* Size of ever active set */ |
| 18 | + int nrow, /* how many rows in Sigma */ |
| 19 | + double bound, /* Lagrange multipler for \ell_1 */ |
| 20 | + double *theta) /* current value */ |
| 21 | +{ |
| 22 | + int irow, icol; |
| 23 | + double value = 0; |
| 24 | + double *Sigma_ptr_tmp = Sigma_ptr; |
| 25 | + double *linear_func_ptr_tmp = linear_func_ptr; |
| 26 | + double *theta_row_ptr, *theta_col_ptr; |
| 27 | + int *active_row_ptr, *active_col_ptr; |
| 28 | + int active_row, active_col; |
| 29 | + int nactive = *nactive_ptr; |
| 30 | + |
| 31 | + theta_row_ptr = theta; |
| 32 | + theta_col_ptr = theta; |
| 33 | + |
| 34 | + for (irow=0; irow<nactive; irow++) { |
| 35 | + |
| 36 | + active_row_ptr = ((int *) ever_active_ptr + irow); |
| 37 | + active_row = *active_row_ptr; |
| 38 | + theta_row_ptr = ((double *) theta + active_row); |
| 39 | + |
| 40 | + for (icol=0; icol<nactive; icol++) { |
| 41 | + |
| 42 | + active_col_ptr = ((int *) ever_active_ptr + icol); |
| 43 | + active_col = *active_col_ptr; |
| 44 | + theta_col_ptr = ((double *) theta + active_col); |
| 45 | + |
| 46 | + Sigma_ptr_tmp = ((double *) Sigma_ptr + nrow * active_col + active_row); // Matrices are column-major order |
| 47 | + |
| 48 | + value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); |
| 49 | + } |
| 50 | + value += bound * fabs((*theta_row_ptr)); // the \ell_1 term |
| 51 | + |
| 52 | + // The linear term in the objective |
| 53 | + |
| 54 | + linear_func_ptr_tmp = ((double *) linear_func_ptr + active_row); |
| 55 | + value += (*linear_func_ptr_tmp) * (*theta_row_ptr); |
| 56 | + |
| 57 | + } |
| 58 | + |
| 59 | + return(value); |
| 60 | +} |
| 61 | + |
| 62 | +int update_ever_active_qp(int coord, |
| 63 | + int *ever_active_ptr, |
| 64 | + int *nactive_ptr) { |
| 65 | + int iactive; |
| 66 | + int active_var; |
| 67 | + int nactive = *nactive_ptr; |
| 68 | + int *ever_active_ptr_tmp = ever_active_ptr; |
| 69 | + |
| 70 | + for (iactive=0; iactive<nactive; iactive++) { |
| 71 | + ever_active_ptr_tmp = ((int *) ever_active_ptr + iactive); |
| 72 | + active_var = *ever_active_ptr_tmp; |
| 73 | + if (active_var == coord) { |
| 74 | + return(1); |
| 75 | + } |
| 76 | + } |
| 77 | + |
| 78 | + // If we haven't returned yet, this means the coord was not in |
| 79 | + // ever_active. |
| 80 | + |
| 81 | + // Add it to the active set and increment the |
| 82 | + // number of active variables |
| 83 | + |
| 84 | + ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); |
| 85 | + *ever_active_ptr_tmp = coord; |
| 86 | + *nactive_ptr += 1; |
| 87 | + |
| 88 | + return(0); |
| 89 | +} |
| 90 | + |
| 91 | +int check_KKT_qp(double *theta, /* current theta */ |
| 92 | + double *gradient_ptr, /* Sigma times theta */ |
| 93 | + int nrow, /* how many rows in Sigma */ |
| 94 | + double bound) /* Lagrange multipler for \ell_1 */ |
| 95 | +{ |
| 96 | + // First check inactive |
| 97 | + |
| 98 | + int irow; |
| 99 | + int fail = 0; |
| 100 | + double tol = 1.e-6; |
| 101 | + double *theta_ptr, *gradient_ptr_tmp; |
| 102 | + double gradient; |
| 103 | + |
| 104 | + for (irow=0; irow<nrow; irow++) { |
| 105 | + theta_ptr = ((double *) theta + irow); |
| 106 | + gradient_ptr_tmp = ((double *) gradient_ptr + irow); |
| 107 | + |
| 108 | + // Compute this coordinate of the gradient |
| 109 | + |
| 110 | + gradient = *gradient_ptr_tmp; |
| 111 | + |
| 112 | + if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound |
| 113 | + if ((*theta_ptr > 0) && (fabs(gradient + bound) > tol * bound)) { |
| 114 | + return(0); |
| 115 | + } |
| 116 | + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { |
| 117 | + return(0); |
| 118 | + } |
| 119 | + } |
| 120 | + else { |
| 121 | + if (fabs(gradient) > (1. + tol) * bound) { |
| 122 | + return(0); |
| 123 | + } |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + return(1); |
| 128 | +} |
| 129 | + |
| 130 | +double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ |
| 131 | + double *linear_func_ptr, /* Linear term in objective */ |
| 132 | + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ |
| 133 | + double *gradient_ptr, /* Sigma times theta */ |
| 134 | + int *ever_active_ptr, /* Ever active set: 0-based */ |
| 135 | + int *nactive_ptr, /* Size of ever active set */ |
| 136 | + int nrow, /* How many rows in Sigma */ |
| 137 | + double bound, /* feasibility parameter */ |
| 138 | + double *theta, /* current value */ |
| 139 | + int coord, /* which coordinate to update: 0-based */ |
| 140 | + int is_active) /* Is this part of ever_active */ |
| 141 | +{ |
| 142 | + |
| 143 | + double delta; |
| 144 | + double linear_term = 0; |
| 145 | + double value = 0; |
| 146 | + double old_value; |
| 147 | + double *Sigma_ptr_tmp; |
| 148 | + double *gradient_ptr_tmp; |
| 149 | + double *theta_ptr; |
| 150 | + int icol = 0; |
| 151 | + |
| 152 | + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); |
| 153 | + double quadratic_term = *quadratic_ptr; |
| 154 | + |
| 155 | + gradient_ptr_tmp = ((double *) gradient_ptr + coord); |
| 156 | + linear_term = *gradient_ptr_tmp; |
| 157 | + |
| 158 | + theta_ptr = ((double *) theta + coord); |
| 159 | + old_value = *theta_ptr; |
| 160 | + |
| 161 | + // The coord entry of gradient_ptr term has a diagonal term in it: |
| 162 | + // Sigma[coord, coord] * theta[coord] |
| 163 | + // This removes it. |
| 164 | + |
| 165 | + linear_term -= quadratic_term * old_value; |
| 166 | + |
| 167 | + // Now soft-threshold the coord entry of theta |
| 168 | + |
| 169 | + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| |
| 170 | + // with q=quadratic_term and l=linear_term |
| 171 | + |
| 172 | + // With a negative linear term, solution should be |
| 173 | + // positive |
| 174 | + |
| 175 | + if (linear_term < -bound) { |
| 176 | + value = (-linear_term - bound) / quadratic_term; |
| 177 | + } |
| 178 | + else if (linear_term > bound) { |
| 179 | + value = -(linear_term - bound) / quadratic_term; |
| 180 | + } |
| 181 | + |
| 182 | + // Add to active set if necessary |
| 183 | + |
| 184 | + if (is_active == 0) { |
| 185 | + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); |
| 186 | + } |
| 187 | + |
| 188 | + // Update the linear term |
| 189 | + |
| 190 | + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { |
| 191 | + |
| 192 | + delta = value - old_value; |
| 193 | + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); |
| 194 | + gradient_ptr_tmp = ((double *) gradient_ptr); |
| 195 | + |
| 196 | + for (icol=0; icol<nrow; icol++) { |
| 197 | + *gradient_ptr_tmp = *gradient_ptr_tmp + delta * (*Sigma_ptr_tmp); |
| 198 | + gradient_ptr_tmp += 1; |
| 199 | + Sigma_ptr_tmp += 1; |
| 200 | + } |
| 201 | + |
| 202 | + theta_ptr = ((double *) theta + coord); |
| 203 | + *theta_ptr = value; |
| 204 | + |
| 205 | + } |
| 206 | + |
| 207 | + return(value); |
| 208 | + |
| 209 | +} |
| 210 | + |
| 211 | +int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ |
| 212 | + double *linear_func_ptr, /* Linear term in objective */ |
| 213 | + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ |
| 214 | + double *gradient_ptr, /* Sigma times theta */ |
| 215 | + int *ever_active_ptr, /* Ever active set: 0-based */ |
| 216 | + int *nactive_ptr, /* Size of ever active set */ |
| 217 | + int nrow, /* How many rows in Sigma */ |
| 218 | + double bound, /* feasibility parameter */ |
| 219 | + double *theta, /* current value */ |
| 220 | + int maxiter) |
| 221 | +{ |
| 222 | + |
| 223 | + int iter = 0; |
| 224 | + int icoord = 0; |
| 225 | + int iactive = 0; |
| 226 | + int *active_ptr; |
| 227 | + |
| 228 | + int check_objective = 0; |
| 229 | + |
| 230 | + double old_value, new_value; |
| 231 | + double tol=1.e-8; |
| 232 | + |
| 233 | + if (check_objective) { |
| 234 | + |
| 235 | + old_value = objective_qp(Sigma_ptr, |
| 236 | + linear_func_ptr, |
| 237 | + ever_active_ptr, |
| 238 | + nactive_ptr, |
| 239 | + nrow, |
| 240 | + bound, |
| 241 | + theta); |
| 242 | + |
| 243 | + } |
| 244 | + |
| 245 | + |
| 246 | + for (iter=0; iter<maxiter; iter++) { |
| 247 | + |
| 248 | + // Update the active variables first |
| 249 | + |
| 250 | + active_ptr = (int *) ever_active_ptr; |
| 251 | + |
| 252 | + for (iactive=0; iactive < *nactive_ptr; iactive++) { |
| 253 | + update_one_coord_qp(Sigma_ptr, |
| 254 | + linear_func_ptr, |
| 255 | + Sigma_diag_ptr, |
| 256 | + gradient_ptr, |
| 257 | + ever_active_ptr, |
| 258 | + nactive_ptr, |
| 259 | + nrow, |
| 260 | + bound, |
| 261 | + theta, |
| 262 | + *active_ptr, |
| 263 | + 1); |
| 264 | + active_ptr++; |
| 265 | + } |
| 266 | + |
| 267 | + // Check KKT |
| 268 | + |
| 269 | + if (check_KKT_qp(theta, |
| 270 | + gradient_ptr, |
| 271 | + nrow, |
| 272 | + bound) == 1) { |
| 273 | + break; |
| 274 | + } |
| 275 | + |
| 276 | + // Update all variables |
| 277 | + |
| 278 | + for (icoord=0; icoord<nrow; icoord++) { |
| 279 | + |
| 280 | + update_one_coord_qp(Sigma_ptr, |
| 281 | + linear_func_ptr, |
| 282 | + Sigma_diag_ptr, |
| 283 | + gradient_ptr, |
| 284 | + ever_active_ptr, |
| 285 | + nactive_ptr, |
| 286 | + nrow, |
| 287 | + bound, |
| 288 | + theta, |
| 289 | + icoord, |
| 290 | + 0); |
| 291 | + } |
| 292 | + |
| 293 | + // Check KKT |
| 294 | + |
| 295 | + if (check_KKT_qp(theta, |
| 296 | + gradient_ptr, |
| 297 | + nrow, |
| 298 | + bound) == 1) { |
| 299 | + break; |
| 300 | + } |
| 301 | + |
| 302 | + if (check_objective) { |
| 303 | + new_value = objective_qp(Sigma_ptr, |
| 304 | + linear_func_ptr, |
| 305 | + ever_active_ptr, |
| 306 | + nactive_ptr, |
| 307 | + nrow, |
| 308 | + bound, |
| 309 | + theta); |
| 310 | + |
| 311 | + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { |
| 312 | + break; |
| 313 | + } |
| 314 | + old_value = new_value; |
| 315 | + } |
| 316 | + } |
| 317 | + return(iter); |
| 318 | +} |
| 319 | + |
0 commit comments