BF: everything was thrown into active set

jonathan-taylor · jonathan-taylor · commit d65e4373ca91 · 2017-08-18T19:44:43.000-07:00
diff --git a/Makefile b/Makefile
@@ -1,4 +1,14 @@
 Rcpp: 
 	- rm -f selectiveInference/src/RcppExports.cpp
 	- rm -f selectiveInference/R/RcppExports.R
-	Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
+	Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
+
+install: Rcpp
+	R CMD install selectiveInference
+
+build: 
+	R CMD build selectiveInference
+
+check: Rcpp build
+	R CMD build selectiveInference
+	R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable
diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R
@@ -297,7 +297,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold
 
     while ((mu.stop != 1)&&(try.no<10)){
       last.beta <- beta
-      #print(c("trying ", try.no))
+      print(c("#######################trying ", try.no))
       output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start
       beta <- output$soln
       iter <- output$iter
@@ -344,9 +344,11 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) {
   # It should be a list
   # with entries "soln", "gradient", "ever_active", "nactive"
 
+  p = nrow(Sigma)
+
   if (is.null(soln_result)) {
-     soln = rep(0, nrow(Sigma))
-     ever_active = rep(0, nrow(Sigma))
+     soln = rep(0, p)
+     ever_active = rep(0, p)
      ever_active[1] = i-1             # 0-based
      ever_active = as.integer(ever_active)
      nactive = as.integer(1)
@@ -363,7 +365,11 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) {
      linear_func = soln_result$linear_func
   }
 
-  result = find_one_row_debiasingM(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing
+  result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing
+  result2 = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing
+
+  print('close?')
+  print(c(sqrt(sum((result$soln-result2$soln)^2)/sum(result$soln^2)), sqrt(sum(result$soln^2)), result2$nactive))
 
   # Check feasibility
 
diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c
@@ -4,25 +4,23 @@
 
 // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf
 
-// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1
-// where l is `linear_func` below
+// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1
 
 // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf
 // Therefore we don't have to negate the answer to get theta.
 // Update one coordinate 
 
 double objective(double *Sigma_ptr,       /* A covariance matrix: X^TX/n */
-		 double *linear_func_ptr, /* Linear term in objective */
 		 int *ever_active_ptr,    /* Ever active set: 0-based */ 
-		 int *nactive_ptr,        /* Size of ever active set */
-		 int nrow,                /* how many rows in Sigma */
-		 double bound,            /* Lagrange multipler for \ell_1 */
-		 double *theta)           /* current value */
+		 int *nactive_ptr,    /* Size of ever active set */
+		 int nrow,            /* how many rows in Sigma */
+		 int row,             /* which row: 0-based */
+		 double bound,        /* Lagrange multipler for \ell_1 */
+		 double *theta)       /* current value */
 {
   int irow, icol;
   double value = 0;
   double *Sigma_ptr_tmp = Sigma_ptr;
-  double *linear_func_ptr_tmp = linear_func_ptr;
   double *theta_row_ptr, *theta_col_ptr;
   int *active_row_ptr, *active_col_ptr;
   int active_row, active_col;
@@ -47,15 +45,12 @@ double objective(double *Sigma_ptr,       /* A covariance matrix: X^TX/n */
 
       value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr);
     }
-    value += bound * fabs((*theta_row_ptr)); // the \ell_1 term
-
-    // The linear term in the objective
+    value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term
+  }
 
-    linear_func_ptr_tmp = ((double *) linear_func_ptr + active_row);
-    value += (*linear_func_ptr_tmp) * (*theta_row_ptr); 
+  theta_row_ptr = ((double *) theta + row);
+  value -= (*theta_row_ptr); // the elementary basis vector term
 
-  }
-  
   return(value);
 }
 
@@ -71,14 +66,14 @@ int update_ever_active(int coord,
 
   for (iactive=0; iactive<nactive; iactive++) {
     ever_active_ptr_tmp = ((int *) ever_active_ptr + iactive);
-    active_var = *ever_active_ptr_tmp;
+    active_var = (*ever_active_ptr_tmp);
     if (active_var == coord) {
       return(1);
     }
   }
-  
-  // If we haven't returned yet, this means the coord was not in 
-  // ever_active.
+
+  // If we have not returned yet, this variable
+  // was not in ever_active
 
   // Add it to the active set and increment the 
   // number of active variables
@@ -93,6 +88,7 @@ int update_ever_active(int coord,
 int check_KKT(double *theta,       /* current theta */
 	      double *gradient_ptr, /* Sigma times theta */
 	      int nrow,            /* how many rows in Sigma */
+	      int row,             /* which row: 0-based */
 	      double bound)        /* Lagrange multipler for \ell_1 */
 {
   // First check inactive
@@ -110,36 +106,39 @@ int check_KKT(double *theta,       /* current theta */
     // Compute this coordinate of the gradient
 
     gradient = *gradient_ptr_tmp;
+    if (row == irow) {
+      gradient -= 1;
+    }
 
     if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound
       if ((*theta_ptr > 0) &&  (fabs(gradient + bound) > tol * bound)) {
-	return(0);
+	fail += 1;
       }
       else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) {
-	return(0);
+	fail += 1;
       }
     }
     else {
       if (fabs(gradient) > (1. + tol) * bound) {
-	return(0);
+	fail += 1;
       }
     }
   }
 
-  return(1);
+  return(fail == 0);
 }
 
 double update_one_coord(double *Sigma_ptr,           /* A covariance matrix: X^TX/n */
-			double *linear_func_ptr,     /* Linear term in objective */
                         double *Sigma_diag_ptr,      /* Diagonal entries of Sigma */
-                        double *gradient_ptr,        /* Sigma times theta */
+                        double *gradient_ptr,     /* Sigma times theta */
 			int *ever_active_ptr,        /* Ever active set: 0-based */ 
-			int *nactive_ptr,            /* Size of ever active set */
-			int nrow,                    /* How many rows in Sigma */
-			double bound,                /* feasibility parameter */
-			double *theta,               /* current value */
-			int coord,                   /* which coordinate to update: 0-based */
-			int is_active)               /* Is this part of ever_active */     
+			int *nactive_ptr,        /* Size of ever active set */
+			int nrow,                /* How many rows in Sigma */
+			double bound,            /* feasibility parameter */
+			double *theta,           /* current value */
+			int row,                 /* which row: 0-based */
+			int coord,               /* which coordinate to update: 0-based */
+			int is_active)           /* Is this part of ever_active */     
 {
 
   double delta;
@@ -154,6 +153,8 @@ double update_one_coord(double *Sigma_ptr,           /* A covariance matrix: X^T
   double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord);
   double quadratic_term = *quadratic_ptr;
 
+  // int *ever_active_ptr_tmp;
+
   gradient_ptr_tmp = ((double *) gradient_ptr + coord);
   linear_term = *gradient_ptr_tmp;
 
@@ -163,9 +164,12 @@ double update_one_coord(double *Sigma_ptr,           /* A covariance matrix: X^T
   // The coord entry of gradient_ptr term has a diagonal term in it:
   // Sigma[coord, coord] * theta[coord]
   // This removes it. 
-
   linear_term -= quadratic_term * old_value;
 
+  if (row == coord) {
+    linear_term -= 1;
+  }
+
   // Now soft-threshold the coord entry of theta 
 
   // Objective is t \mapsto q/2 * t^2 + l * t + bound |t|
@@ -183,7 +187,7 @@ double update_one_coord(double *Sigma_ptr,           /* A covariance matrix: X^T
 
   // Add to active set if necessary
 
-  if (is_active == 0) {
+  if ((is_active == 0) && (value != 0)) {
     update_ever_active(coord, ever_active_ptr, nactive_ptr);
   }
 
@@ -211,31 +215,31 @@ double update_one_coord(double *Sigma_ptr,           /* A covariance matrix: X^T
 }
 
 int find_one_row_(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
- 		  double *linear_func_ptr,    /* Linear term in objective */
 		  double *Sigma_diag_ptr,     /* Diagonal entry of covariance matrix */
-		  double *gradient_ptr,       /* Sigma times theta */
+		  double *gradient_ptr,    /* Sigma times theta */
 		  int *ever_active_ptr,       /* Ever active set: 0-based */ 
-		  int *nactive_ptr,           /* Size of ever active set */
-		  int nrow,                   /* How many rows in Sigma */
-		  double bound,               /* feasibility parameter */
-		  double *theta,              /* current value */
-		  int maxiter)
+		  int *nactive_ptr,       /* Size of ever active set */
+		  int nrow,               /* How many rows in Sigma */
+		  double bound,           /* feasibility parameter */
+		  double *theta,          /* current value */
+		  int maxiter,            /* how many iterations */
+		  int row)                /* which coordinate to update: 0-based */
 {
 
   int iter = 0;
   int icoord = 0;
   int iactive = 0;
   int *active_ptr;
 
-/*   double old_value = objective(Sigma_ptr, */
-/* 			       linear_func_ptr, */
-/* 			       ever_active_ptr, */
-/* 			       nactive_ptr, */
-/* 			       nrow, */
-/* 			       bound, */
-/* 			       theta); */
+  double old_value = objective(Sigma_ptr,
+			       ever_active_ptr,
+			       nactive_ptr,
+			       nrow,
+			       row,
+			       bound,
+			       theta);
   double new_value; 
-  double tol=1.e-8;
+  double tol=1.e-5;
 
   for (iter=0; iter<maxiter; iter++) {
 
@@ -245,14 +249,14 @@ int find_one_row_(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
 
     for (iactive=0; iactive < *nactive_ptr; iactive++) {
       update_one_coord(Sigma_ptr,
-		       linear_func_ptr,
 		       Sigma_diag_ptr,
 		       gradient_ptr,
 		       ever_active_ptr,
 		       nactive_ptr,
 		       nrow,
 		       bound,
 		       theta,
+		       row,
 		       *active_ptr,
 		       1);
       active_ptr++;
@@ -263,6 +267,7 @@ int find_one_row_(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
     if (check_KKT(theta,
 		  gradient_ptr,
 		  nrow,
+		  row,
 		  bound) == 1) {
       break;
     }
@@ -272,14 +277,14 @@ int find_one_row_(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
     for (icoord=0; icoord<nrow; icoord++) {
 
       update_one_coord(Sigma_ptr,
-		       linear_func_ptr,
 		       Sigma_diag_ptr,
 		       gradient_ptr,
 		       ever_active_ptr,
 		       nactive_ptr,
 		       nrow,
 		       bound,
 		       theta,
+		       row,
 		       icoord,
 		       0);
     }
@@ -289,23 +294,24 @@ int find_one_row_(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
     if (check_KKT(theta,
 		  gradient_ptr,
 		  nrow,
+		  row,
 		  bound) == 1) {
       break;
     }
 					  
-/*     new_value = objective(Sigma_ptr, */
-/* 			  linear_func_ptr, */
-/* 			  ever_active_ptr, */
-/* 			  nactive_ptr, */
-/* 			  nrow, */
-/* 			  bound, */
-/* 			  theta); */
-
-/*     if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */
-/*       break; */
-/*     } */
-
-//    old_value = new_value;
+    new_value = objective(Sigma_ptr,
+			  ever_active_ptr,
+			  nactive_ptr,
+			  nrow,
+			  row,
+			  bound,
+			  theta);
+
+    if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) {
+      break;
+    }
+
+    old_value = new_value;
   }
   return(iter);
 }
diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c
@@ -1,4 +1,5 @@
 #include <math.h> // for fabs
+#include <stdio.h>
 
 // Find an approximate row of \hat{Sigma}^{-1}
 
@@ -81,6 +82,8 @@ int update_ever_active_qp(int coord,
   // Add it to the active set and increment the 
   // number of active variables
 
+  fprintf(stderr, "adding %d\n", coord);
+
   ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr);
   *ever_active_ptr_tmp = coord;
   *nactive_ptr += 1;
@@ -181,7 +184,7 @@ double update_one_coord_qp(double *Sigma_ptr,           /* A covariance matrix:
 
   // Add to active set if necessary
 
-  if (is_active == 0) {
+  if ((is_active == 0) && (value != 0)) {
     update_ever_active_qp(coord, ever_active_ptr, nactive_ptr);
   }
 
@@ -230,6 +233,8 @@ int solve_qp(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
   double old_value, new_value; 
   double tol=1.e-8;
 
+  fprintf(stderr, "%d nactive start\n", *nactive_ptr);
+
   if (check_objective) {
 
     old_value = objective_qp(Sigma_ptr,
@@ -245,6 +250,7 @@ int solve_qp(double *Sigma_ptr,          /* A covariance matrix: X^TX/n */
 
   for (iter=0; iter<maxiter; iter++) {
 
+    fprintf(stderr, "%d nactive loop \n", *nactive_ptr);
     // Update the active variables first
 
     active_ptr = (int *) ever_active_ptr;