bug found in qp solver -- look at tests/test_QP.R

jonathan-taylor · jonathan-taylor · commit 83b04268b9f3 · 2017-10-26T22:41:38.000-07:00
diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE
@@ -44,4 +44,4 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict,
 importFrom("stats", "coef", "df", "lm", "pf")
 importFrom("stats", "glm", "residuals", "vcov")
 importFrom("Rcpp", "sourceCpp")
-
+importFrom("distr", "Norm", "DExp")
diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R
@@ -327,6 +327,7 @@ debiasingMatrix = function(Xinfo,               # could be X or t(X) %*% X / n d
 			   warn_kkt=FALSE,      # warn if KKT does not seem to be satisfied?
 			   max_iter=100,        # how many iterations for each optimization problem
                            kkt_tol=1.e-4,       # tolerance for the KKT conditions
+                           parameter_tol=1.e-4, # tolerance for relative convergence of parameter
 			   objective_tol=1.e-8  # tolerance for relative decrease in objective
                            ) {
 
@@ -363,6 +364,7 @@ debiasingMatrix = function(Xinfo,               # could be X or t(X) %*% X / n d
 			  warn_kkt=FALSE,
 			  max_iter=max_iter,
 			  kkt_tol=kkt_tol,
+			  parameter_tol=parameter_tol,
 			  objective_tol=objective_tol)
 
     if (warn_kkt && (!output$kkt_check)) {
@@ -393,6 +395,7 @@ debiasingRow = function (Xinfo,               # could be X or t(X) %*% X / n dep
 			 warn_kkt=FALSE,      # warn if KKT does not seem to be satisfied?
 			 max_iter=100,        # how many iterations for each optimization problem
                          kkt_tol=1.e-4,       # tolerance for the KKT conditions
+			 parameter_tol=1.e-4, # tolerance for relative convergence of parameter
 			 objective_tol=1.e-8  # tolerance for relative decrease in objective
                          ) {
 
@@ -433,6 +436,7 @@ debiasingRow = function (Xinfo,               # could be X or t(X) %*% X / n dep
                             nactive, 
                             kkt_tol, 
                             objective_tol, 
+			    parameter_tol,
                             max_active,
 			    FALSE,        # objective_stop
 			    FALSE,        # kkt_stop
@@ -451,6 +455,7 @@ debiasingRow = function (Xinfo,               # could be X or t(X) %*% X / n dep
                                  nactive, 
                                  kkt_tol, 
                                  objective_tol, 
+				 parameter_tol,
                                  max_active,
 				 FALSE,       # objective_stop
 				 FALSE,       # kkt_stop
diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R
@@ -11,6 +11,7 @@ fit_randomized_lasso = function(X,
                                 noise_type=c('gaussian', 'laplace'),
                                 max_iter=100,        # how many iterations for each optimization problem
                                 kkt_tol=1.e-4,       # tolerance for the KKT conditions
+                                parameter_tol=1.e-8, # tolerance for relative convergence of parameter
                                 objective_tol=1.e-8, # tolerance for relative decrease in objective
                                 objective_stop=FALSE,
                                 kkt_stop=TRUE,
@@ -56,6 +57,7 @@ fit_randomized_lasso = function(X,
                            nactive, 
                            kkt_tol, 
                            objective_tol, 
+			   parameter_tol,
                            p,
 		           objective_stop,     # objective_stop
 			   kkt_stop,           # kkt_stop
diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp
@@ -15,6 +15,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma,
 		    Rcpp::IntegerVector nactive,
 		    double kkt_tol,
 		    double objective_tol,
+		    double parameter_tol,
 		    int max_active,
 		    int objective_stop,
 		    int kkt_stop,
@@ -52,6 +53,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma,
 		      maxiter,
 		      kkt_tol,
 		      objective_tol,
+		      parameter_tol,
 		      max_active,
 		      objective_stop,
 		      kkt_stop,
@@ -92,6 +94,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X,
 			 Rcpp::IntegerVector nactive,
 			 double kkt_tol,
 			 double objective_tol,
+			 double parameter_tol,
 			 int max_active,
 			 int objective_stop,
 			 int kkt_stop,
@@ -142,6 +145,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X,
 			maxiter,
 			kkt_tol,
 			objective_tol,
+			parameter_tol,
 			max_active,
 			objective_stop,
 			kkt_stop,
diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h
@@ -16,6 +16,7 @@ int solve_qp(double *nndef_ptr,          /* A non-negative definite matrix */
 	     int maxiter,                /* max number of iterations */
 	     double kkt_tol,             /* precision for checking KKT conditions */
 	     double objective_tol,       /* precision for checking relative decrease in objective value */
+	     double parameter_tol,       /* precision for checking relative convergence of parameter */
 	     int max_active,             /* Upper limit for size of active set -- otherwise break */ 
 	     int objective_stop,         /* Break based on convergence of objective value? */
              int kkt_stop,               /* Break based on KKT? */
@@ -44,6 +45,7 @@ int solve_wide(double *X_ptr,              /* Sqrt of non-neg def matrix -- X^TX
 	       int maxiter,                /* max number of iterations */
 	       double kkt_tol,             /* precision for checking KKT conditions */
 	       double objective_tol,       /* precision for checking relative decrease in objective value */
+	       double parameter_tol,       /* precision for checking relative convergence of parameter */
 	       int max_active,             /* Upper limit for size of active set -- otherwise break */ 
 	       int objective_stop,         /* Break based on convergence of objective value? */
 	       int kkt_stop,               /* Break based on KKT? */
diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c
@@ -273,6 +273,7 @@ int solve_qp(double *nndef_ptr,          /* A non-negative definite matrix */
 	     int maxiter,                /* max number of iterations */
 	     double kkt_tol,             /* precision for checking KKT conditions */
 	     double objective_tol,       /* precision for checking relative decrease in objective value */
+	     double parameter_tol,       /* precision for checking relative convergence of parameter */
 	     int max_active,             /* Upper limit for size of active set -- otherwise break */ 
 	     int objective_stop,         /* Break based on convergence of objective value? */
              int kkt_stop,               /* Break based on KKT? */
@@ -292,7 +293,6 @@ int solve_qp(double *nndef_ptr,          /* A non-negative definite matrix */
   double norm_diff = 1.;
   double norm_last = 1.;
   double delta;
-  double threshold = 1.e-2;
   double *theta_ptr, *theta_old_ptr;
 
   if (objective_stop) {
@@ -403,7 +403,7 @@ int solve_qp(double *nndef_ptr,          /* A non-negative definite matrix */
 	norm_diff = sqrt(norm_diff);
 	norm_last = sqrt(norm_last);
 	
-	if (norm_diff < threshold * norm_last) {
+	if (norm_diff < parameter_tol * norm_last) {
 	  break;
 	}
       }
diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c
@@ -4,7 +4,7 @@
 
 // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf
 
-// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2
+// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2
 // where l is `linear_func` below
 
 // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf
@@ -393,6 +393,7 @@ int solve_wide(double *X_ptr,              /* Sqrt of non-neg def matrix -- X^TX
 	       int maxiter,                /* max number of iterations */
 	       double kkt_tol,             /* precision for checking KKT conditions */
 	       double objective_tol,       /* precision for checking relative decrease in objective value */
+	       double parameter_tol,       /* precision for checking relative convergence of parameter */
 	       int max_active,             /* Upper limit for size of active set -- otherwise break */ 
 	       int objective_stop,         /* Break based on convergence of objective value? */
 	       int kkt_stop,               /* Break based on KKT? */
@@ -412,7 +413,6 @@ int solve_wide(double *X_ptr,              /* Sqrt of non-neg def matrix -- X^TX
   double norm_diff = 1.;
   double norm_last = 1.;
   double delta;
-  double threshold = 1.e-2;
   double *theta_ptr_tmp, *theta_old_ptr_tmp;
 
   if (objective_stop) {
@@ -552,7 +552,7 @@ int solve_wide(double *X_ptr,              /* Sqrt of non-neg def matrix -- X^TX
 	norm_diff = sqrt(norm_diff);
 	norm_last = sqrt(norm_last);
 	
-	if (norm_diff < threshold * norm_last) {
+	if (norm_diff < parameter_tol * norm_last) {
 	  break;
 	}
       }
diff --git a/tests/test_QP.R b/tests/test_QP.R
@@ -0,0 +1,15 @@
+library(selectiveInference)
+### Test
+
+n = 100; p = 50
+
+X = matrix(rnorm(n * p), n, p)
+Y = rnorm(n)
+lam = 2
+
+soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln
+G = glmnet(X, Y, intercept=FALSE, standardize=FALSE)
+soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1]
+
+print(soln1)
+print(soln2)
diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R
@@ -1,7 +1,143 @@
 library(selectiveInference)
-source('oldcode.R')
 
-n = 500; p = 50
+
+## Approximates inverse covariance matrix theta
+InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) {
+  isgiven <- 1;
+  if (is.null(mu)){
+    isgiven <- 0;
+  }
+  
+  p <- nrow(sigma);
+  M <- matrix(0, p, p);
+  xperc = 0;
+  xp = round(p/10);
+  for (i in 1:p) {
+    if ((i %% xp)==0){
+      xperc = xperc+10;
+      if (verbose) {
+        print(paste(xperc,"% done",sep="")); }
+    }
+    if (isgiven==0){
+      mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2)));
+    }
+    mu.stop <- 0;
+    try.no <- 1;
+    incr <- 0;
+    while ((mu.stop != 1)&&(try.no<10)){
+      last.beta <- beta
+      output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold)
+      beta <- output$optsol
+      iter <- output$iter
+      if (isgiven==1){
+        mu.stop <- 1
+      }
+      else{
+        if (try.no==1){
+          if (iter == (maxiter+1)){
+            incr <- 1;
+            mu <- mu*resol;
+          } else {
+            incr <- 0;
+            mu <- mu/resol;
+          }
+        }
+        if (try.no > 1){
+          if ((incr == 1)&&(iter == (maxiter+1))){
+            mu <- mu*resol;
+          }
+          if ((incr == 1)&&(iter < (maxiter+1))){
+            mu.stop <- 1;
+          }
+          if ((incr == 0)&&(iter < (maxiter+1))){
+            mu <- mu/resol;
+          }
+          if ((incr == 0)&&(iter == (maxiter+1))){
+            mu <- mu*resol;
+            beta <- last.beta;
+            mu.stop <- 1;
+          }
+        }
+      }
+      try.no <- try.no+1
+    }
+    M[i,] <- beta;
+  }
+  return(M)
+}
+
+InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) {
+  p <- nrow(sigma);
+  rho <- max(abs(sigma[i,-i])) / sigma[i,i];
+  mu0 <- rho/(1+rho);
+  beta <- rep(0,p);
+  
+  #if (mu >= mu0){
+  #  beta[i] <- (1-mu0)/sigma[i,i];
+  #  returnlist <- list("optsol" = beta, "iter" = 0);
+  #  return(returnlist);
+  #}
+  
+  diff.norm2 <- 1;
+  last.norm2 <- 1;
+  iter <- 1;
+  iter.old <- 1;
+  beta[i] <- (1-mu0)/sigma[i,i];
+  beta.old <- beta;
+  sigma.tilde <- sigma;
+  diag(sigma.tilde) <- 0;
+  vs <- -sigma.tilde%*%beta;
+  
+  while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){
+    
+    for (j in 1:p){
+      oldval <- beta[j];
+      v <- vs[j];
+      if (j==i)
+        v <- v+1;
+      beta[j] <- SoftThreshold(v,mu)/sigma[j,j];
+      if (oldval != beta[j]){
+        vs <- vs + (oldval-beta[j])*sigma.tilde[,j];
+      }
+    }
+    
+    iter <- iter + 1;
+    if (iter==2*iter.old){
+      d <- beta - beta.old;
+      diff.norm2 <- sqrt(sum(d*d));
+      last.norm2 <-sqrt(sum(beta*beta));
+      iter.old <- iter;
+      beta.old <- beta;
+      #if (iter>10)
+      #  vs <- -sigma.tilde%*%beta;
+    }
+
+    # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu))
+
+  }
+  
+  returnlist <- list("optsol" = beta, "iter" = iter)
+  return(returnlist)
+}
+
+SoftThreshold <- function( x, lambda ) {
+  #
+  # Standard soft thresholding
+  #
+  if (x>lambda){
+    return (x-lambda);}
+  else {
+    if (x< (-lambda)){
+      return (x+lambda);}
+    else {
+      return (0); }
+  }
+}
+
+
+### Test
+
+n = 100; p = 50
 
 X = matrix(rnorm(n * p), n, p)
 S = t(X) %*% X / n
@@ -25,3 +161,35 @@ plot(B1[1,], C1[1,])
 plot(A1[1,], A2[1,])
 plot(B1[1,], B2[1,])
 plot(C1[1,], C2[1,])
+
+print(c('A', sum(A1[1,] == 0)))
+print(c('B', sum(B1[1,] == 0)))
+print(c('C', sum(C1[1,] == 0)))
+
+## Are our points feasible
+
+feasibility = function(S, soln, j, mu) {
+     p = nrow(S)
+     E = rep(0, p)
+     E[j] = 1
+     G = S %*% soln - E
+     return(c(max(abs(G)), mu))
+}
+
+print(c('feasibility A', feasibility(S, A1[1,], 1, mu)))
+print(c('feasibility B', feasibility(S, B1[1,], 1, mu)))
+print(c('feasibility C', feasibility(S, C1[1,], 1, mu)))
+
+active_KKT = function(S, soln, j, mu) {
+     p = nrow(S)
+     E = rep(0, p)
+     E[j] = 1
+     G = S %*% soln - E
+     return(c(G[soln != 0] * sign(soln)[soln != 0], mu))
+}
+
+print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu)))
+print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu)))
+print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu)))
+
+