|
| 1 | +# Special linear time order function, works only when x |
| 2 | +# is a scrambled vector of integers. |
| 3 | + |
| 4 | +Order <- function(x) { |
| 5 | + n = length(x) |
| 6 | + o = numeric(n) |
| 7 | + o[x] = Seq(1,n) |
| 8 | + return(o) |
| 9 | +} |
| 10 | + |
| 11 | +# Returns a sequence of integers from a to b if a <= b, |
| 12 | +# otherwise nothing. You have no idea how important this |
| 13 | +# function is... |
| 14 | + |
| 15 | +Seq <- function(a, b, ...) { |
| 16 | + if (a<=b) return(seq(a,b,...)) |
| 17 | + else return(numeric(0)) |
| 18 | +} |
| 19 | + |
| 20 | +# Returns the sign of x, with Sign(0)=1. |
| 21 | + |
| 22 | +Sign <- function(x) { |
| 23 | + return(-1+2*(x>=0)) |
| 24 | +} |
| 25 | + |
| 26 | +############################## |
| 27 | + |
| 28 | +# Centering and scaling convenience function |
| 29 | + |
| 30 | +standardize <- function(x, y, intercept, normalize) { |
| 31 | + x = as.matrix(x) |
| 32 | + y = as.numeric(y) |
| 33 | + n = nrow(x) |
| 34 | + p = ncol(x) |
| 35 | + |
| 36 | + if (intercept) { |
| 37 | + bx = colMeans(x) |
| 38 | + by = mean(y) |
| 39 | + x = scale(x,bx,FALSE) |
| 40 | + y = y-mean(y) |
| 41 | + } else { |
| 42 | + bx = rep(0,p) |
| 43 | + by = 0 |
| 44 | + } |
| 45 | + if (normalize) { |
| 46 | + sx = sqrt(colSums(x^2)) |
| 47 | + x = scale(x,FALSE,sx) |
| 48 | + } else { |
| 49 | + sx = rep(1,p) |
| 50 | + } |
| 51 | + |
| 52 | + return(list(x=x,y=y,bx=bx,by=by,sx=sx)) |
| 53 | +} |
| 54 | + |
| 55 | +############################## |
| 56 | + |
| 57 | +# Interpolation function to get coefficients |
| 58 | + |
| 59 | +coef.interpolate <- function(betas, s, knots, dec=TRUE) { |
| 60 | + # Sort the s values |
| 61 | + o = order(s,dec=dec) |
| 62 | + s = s[o] |
| 63 | + |
| 64 | + k = length(s) |
| 65 | + mat = matrix(rep(knots,each=k),nrow=k) |
| 66 | + if (dec) b = s >= mat |
| 67 | + else b = s <= mat |
| 68 | + blo = max.col(b,ties.method="first") |
| 69 | + bhi = pmax(blo-1,1) |
| 70 | + |
| 71 | + i = bhi==blo |
| 72 | + p = numeric(k) |
| 73 | + p[i] = 0 |
| 74 | + p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] |
| 75 | + |
| 76 | + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) |
| 77 | + colnames(beta) = as.character(round(s,3)) |
| 78 | + rownames(beta) = NULL |
| 79 | + |
| 80 | + # Return in original order |
| 81 | + o = order(o) |
| 82 | + return(beta[,o,drop=FALSE]) |
| 83 | +} |
| 84 | + |
| 85 | +############################## |
| 86 | + |
| 87 | +checkargs.xy <- function(x, y) { |
| 88 | + if (missing(x)) stop("x is missing") |
| 89 | + if (is.null(x) || !is.matrix(x)) stop("x must be a matrix") |
| 90 | + if (missing(y)) stop("y is missing") |
| 91 | + if (is.null(y) || !is.numeric(y)) stop("y must be numeric") |
| 92 | + if (ncol(x) == 0) stop("There must be at least one predictor [must have ncol(x) > 0]") |
| 93 | + if (checkcols(x)) stop("x cannot have duplicate columns") |
| 94 | + if (length(y) == 0) stop("There must be at least one data point [must have length(y) > 0]") |
| 95 | + if (length(y)!=nrow(x)) stop("Dimensions don't match [length(y) != nrow(x)]") |
| 96 | +} |
| 97 | + |
| 98 | +checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, |
| 99 | + gridrange=NULL, gridpts=NULL, griddepth=NULL, |
| 100 | + mult=NULL, ntimes=NULL, |
| 101 | + beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, |
| 102 | + bh.q=NULL) { |
| 103 | + |
| 104 | + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") |
| 105 | + if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") |
| 106 | + if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") |
| 107 | + if (!is.null(k) && length(k) != 1) stop("k must be a single number") |
| 108 | + if (!is.null(k) && (k < 1 || k != floor(k))) stop("k must be an integer >= 1") |
| 109 | + if (!is.null(gridrange) && (length(gridrange) != 2 || gridrange[1] > gridrange[2])) |
| 110 | + stop("gridrange must be an interval of the form c(a,b) with a <= b") |
| 111 | + if (!is.null(gridpts) && (gridpts < 20 || gridpts != round(gridpts))) |
| 112 | + stop("gridpts must be an integer >= 20") |
| 113 | + if (!is.null(griddepth) && (griddepth > 10 || griddepth != round(griddepth))) |
| 114 | + stop("griddepth must be an integer <= 10") |
| 115 | + if (!is.null(mult) && mult < 0) stop("mult must be >= 0") |
| 116 | + if (!is.null(ntimes) && (ntimes <= 0 || ntimes != round(ntimes))) |
| 117 | + stop("ntimes must be an integer > 0") |
| 118 | + if (!is.null(beta) && sum(beta!=0)==0) stop("Value of lambda too large, beta is zero") |
| 119 | + if (!is.null(lambda) && length(lambda) != 1) stop("lambda must be a single number") |
| 120 | + if (!is.null(lambda) && lambda < 0) stop("lambda must be >=0") |
| 121 | + if (!is.null(tol.beta) && tol.beta <= 0) stop("tol.beta must be > 0") |
| 122 | + if (!is.null(tol.kkt) && tol.kkt <= 0) stop("tol.kkt must be > 0") |
| 123 | +} |
| 124 | + |
| 125 | +# Make sure that no two columms of A are the same |
| 126 | +# (this works with probability one). |
| 127 | + |
| 128 | +checkcols <- function(A) { |
| 129 | + b = rnorm(nrow(A)) |
| 130 | + a = sort(t(A)%*%b) |
| 131 | + if (any(diff(a)==0)) return(TRUE) |
| 132 | + return(FALSE) |
| 133 | +} |
| 134 | + |
| 135 | +estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { |
| 136 | + checkargs.xy(x,rep(0,nrow(x))) |
| 137 | + if(nrow(x)<10) stop("Number of observations must be at least 10 to run estimateSigma") |
| 138 | + cvfit=cv.glmnet(x,y,intercept=intercept,standardize=standardize) |
| 139 | + lamhat=cvfit$lambda.min |
| 140 | + fit=glmnet(x,y,standardize=standardize) |
| 141 | + yhat=predict(fit,x,s=lamhat) |
| 142 | + nz=sum(predict(fit,s=lamhat, type="coef")!=0) |
| 143 | + sigma=sqrt(sum((y-yhat)^2)/(length(y)-nz-1)) |
| 144 | + return(list(sigmahat=sigma, df=nz)) |
| 145 | +} |
| 146 | + |
| 147 | +# Update the QR factorization, after a column has been |
| 148 | +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. |
| 149 | + |
| 150 | +updateQR <- function(Q1,Q2,R,col) { |
| 151 | + m = nrow(Q1) |
| 152 | + n = ncol(Q1) |
| 153 | + k = ncol(Q2) |
| 154 | + |
| 155 | + a = .C("update1", |
| 156 | + Q2=as.double(Q2), |
| 157 | + w=as.double(t(Q2)%*%col), |
| 158 | + m=as.integer(m), |
| 159 | + k=as.integer(k), |
| 160 | + dup=FALSE, |
| 161 | + package="selectiveInference") |
| 162 | + |
| 163 | + Q2 = matrix(a$Q2,nrow=m) |
| 164 | + w = c(t(Q1)%*%col,a$w) |
| 165 | + |
| 166 | + # Re-structure: delete a column from Q2, add one to |
| 167 | + # Q1, and expand R |
| 168 | + Q1 = cbind(Q1,Q2[,1]) |
| 169 | + Q2 = Q2[,-1,drop=FALSE] |
| 170 | + R = rbind(R,rep(0,n)) |
| 171 | + R = cbind(R,w[Seq(1,n+1)]) |
| 172 | + |
| 173 | + return(list(Q1=Q1,Q2=Q2,R=R)) |
| 174 | +} |
| 175 | + |
| 176 | +# Moore-Penrose pseudo inverse for symmetric matrices |
| 177 | + |
| 178 | +pinv <- function(A, tol=.Machine$double.eps) { |
| 179 | + e = eigen(A) |
| 180 | + v = Re(e$vec) |
| 181 | + d = Re(e$val) |
| 182 | + d[d > tol] = 1/d[d > tol] |
| 183 | + d[d < tol] = 0 |
| 184 | + if (length(d)==1) return(v*d*v) |
| 185 | + else return(v %*% diag(d) %*% t(v)) |
| 186 | +} |
0 commit comments