diff --git a/yaglm/config/penalty.py b/yaglm/config/penalty.py index 5b1cf71..e7007b1 100644 --- a/yaglm/config/penalty.py +++ b/yaglm/config/penalty.py @@ -31,7 +31,7 @@ class Ridge(WithPenSeqConfig): (Optional) Weights for each term in the penalty. """ @autoassign - def __init__(self, pen_val=1, weights=None): pass + def __init__(self, pen_val=1, weights=None, targ_ubd=1): pass def get_pen_val_max(self, X, y, loss, fit_intercept=True, sample_weight=None, init_data=None): @@ -40,7 +40,7 @@ def get_pen_val_max(self, X, y, loss, fit_intercept=True, weights=self.weights, fit_intercept=fit_intercept, sample_weight=sample_weight, - targ_ubd=1, + targ_ubd=self.targ_ubd, norm_by_dim=True) @@ -310,12 +310,12 @@ class ElasticNet(ElasticNetConfig): """ Represents the ElasticNet penalty - pen_val * mix_val ||coef||_1 + 0.5 * pen_val * (1 - mix_val) * ||coef||_2^2 + pen_val * mix_val ||coef||_1 + pen_val * (1 - mix_val) * ||coef||_2^2 The Lasso may have weights (though not the ridge at this time) or may be flavored. We define the non-convex elastic net as - non-convex_{pen_val * mix_val} (coef) + 0.5 * pen_val * (1 - mix_val) * ||coef||_2^2 + non-convex_{pen_val * mix_val} (coef) + pen_val * (1 - mix_val) * ||coef||_2^2 Parameters ---------- diff --git a/yaglm/infer/Inferencer.py b/yaglm/infer/Inferencer.py index 9155ccd..11c3249 100644 --- a/yaglm/infer/Inferencer.py +++ b/yaglm/infer/Inferencer.py @@ -6,7 +6,7 @@ from yaglm.config.base import Config from yaglm.autoassign import autoassign from yaglm.config.penalty import Lasso -from yaglm.infer.dof import est_dof_support +from yaglm.infer.dof import est_dof_support, est_dof_enet from yaglm.utils import is_fitted from yaglm.config.loss import get_loss_config @@ -136,6 +136,15 @@ def after_fit(self, estimator, X, y, sample_weight=None): else: # we don't currently support estimating the DoF for this model self.dof_ = None + + elif self.dof == 'enet': + + self.dof_ = est_dof_enet(coef = estimator.coef_, + pen_val=estimator.fit_penalty_.pen_val, + mix_val=estimator.fit_penalty_.mix_val, + X = X, + intercept=estimator.intercept_, + zero_tol=zero_tol) elif isinstance(self.dof, Number): # user provided DOF value diff --git a/yaglm/infer/dof.py b/yaglm/infer/dof.py index fddbb5c..1a6f0fe 100644 --- a/yaglm/infer/dof.py +++ b/yaglm/infer/dof.py @@ -49,3 +49,66 @@ def est_dof_support(coef, intercept=None, transform=None, zero_tol=1e-6): DoF = n_nonzero_coef + n_vals_intercept return DoF + + +def est_dof_enet(coef, pen_val, mix_val, X, intercept=None, zero_tol=1e-6): + + """ + The size of the support of the estimated coefficient for elastic net at a particular + penalty and mixing value. + + ElasticNet penalty: + + pen_val * mix_val ||coef||_1 + pen_val * (1 - mix_val) * ||coef||_2^2 + + Parameters + ---------- + coef: array-like + The estimated coefficient. + + pen_val: float, + current penalty value in the elastic net penalty + + mix_val: float, + current mixing value in the elastic net penalty + + X: (n,d)-array, + design matrix excluding the intercept column + + intercept: None, float, array-like + (Optional) The estimated coefficeint. + + zero_tol: float + Tolerance for declaring a small value equal to zero. This addresses numerical issues where some solvers may not return exact zeros. + + Output + ------ + DoF: int + The estimaed number of degrees of freedom. The DoF of the coefficeint is given by either ||coef||_0 or ||transform(coef)||_0 + + References + ---------- + Zou, H., Hastie, T. and Tibshirani, R., 2007. On the “degrees of freedom” of the lasso. The Annals of Statistics, 35(5), pp.2173-2192. + """ + + # Get the estimated support from the fitted coefficient + if intercept is not None: + coef = np.concatenate([[intercept], coef]) + + support = (abs(coef) > zero_tol) + + # tuning parameter attached to the ridge penalty + lambda_2 = pen_val * (1 - mix_val) + + # Get the columns of the design matrix that correspond to the non-zero coef + if intercept is not None: + ones = np.ones((X.shape[0], 1)) + X = np.concatenate([ones, X], axis = 1) + + X_A = X[:, support].copy() + + xtx_li_inv = np.linalg.inv(X_A.T @ X_A + lambda_2 * np.identity(X_A.shape[1])) + + DoF = np.trace(X_A @ xtx_li_inv @ X_A.T) + + return(DoF) diff --git a/yaglm/metrics/deviance_measures.py b/yaglm/metrics/deviance_measures.py new file mode 100644 index 0000000..9a158d9 --- /dev/null +++ b/yaglm/metrics/deviance_measures.py @@ -0,0 +1,452 @@ +import numpy as np +from scipy.special import betainc, gamma + +######################################################### +# Deviance measures +######################################################### + + +def normal_deviance(y, z_hat, scale=1, sample_weight=None): + """ + Computes the (possibly scaled) deviance for a normal distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept_hat. + + scale: float + The noise variance. + + sample_weight: None, array-like shape (n_samples, ) + (Optional) Sample weights. + + Output + ------ + dev: float + The deviance value. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + # compute scaled deviace for each sample + sample_devs = (y-z_hat)**2 / scale + + # sum the sample deviances! + if sample_weight is None: + return sample_devs.sum() + else: + return sample_weight.T @ sample_devs + + +def poisson_deviance(y, z_hat, sample_weight = None): + """ + Computes the (possibly scaled) deviance for a poisson distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + + sample_weight: None, array-like shape (n_samples, ) + Optional sample weights + + Output + ------ + dev: float + The deviance value. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + d_i2 = y*np.log(y/z_hat) - (y-z_hat) + + if sample_weight is None: + return 2*sum(d_i2) + else: + return 2*(sample_weight.T @ d_i2) + + +def binomial_deviance(n, y, z_hat, sample_weight = None): + """ + Computes the (possibly scaled) deviance for a binomial distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + n: array-like shape (n_samples, ) + The number of observations from which the response data were drawn from. + + y: array-like shape (n_samples, ) + The response data. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + + sample_weight: None, array-like shape (n_samples, ) + Optional sample weights + + Output + ------ + dev: float + The deviance value. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + d_i2 = y*np.log(y/z_hat) + (n-y)*np.log((n-y)/(n-z_hat)) + + if sample_weight is None: + return 2*sum(d_i2) + else: + return 2*(sample_weight.T @ d_i2) + + +def gamma_deviance(y, z_hat, sample_weight = None): + """ + Computes the (possibly scaled) deviance for a gamma distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + + sample_weight: None, array-like shape (n_samples, ) + Optional sample weights + + Output + ------ + dev: float + The deviance value. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + d_i2 = -np.log(y/z_hat) + (y - z_hat)/z_hat + + if sample_weight is None: + return 2*sum(d_i2) + else: + return 2*(sample_weight.T @ d_i2) + + +######################################################### +# Additional Residuals +######################################################### + + +def pearson_resid_normal(y, z_hat): + """ + Computes the Pearson Residual for a normal distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be in (-\infty, \infty). + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be in (-\infty, \infty). + + Output + ------ + ||r_P||_2^2: float + The sum of squared Pearson Residuals for Normally distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + pearson_resid = (y - z_hat) + + return sum(pearson_resid**2) + + +def pearson_resid_poisson(y, z_hat): + """ + Computes the Pearson Residual for a Poisson distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be in the natural numbers. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be in the natural numbers. + + Output + ------ + ||r_P||_2^2: float + The sum of squared Pearson Residuals for Poisson distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + std_z_hat = z_hat**(1/2) + pearson_resid = (y - z_hat) / std_z_hat + return sum(pearson_resid**2) + + +def pearson_resid_binomial(y, z_hat): + """ + Computes the Pearson Residual for a Binomial distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be between 0 and 1. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be between 0 and 1. + + Output + ------ + ||r_P||_2^2: float + The sum of squared Pearson Residuals for Binomially distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + e_theta = z_hat/(1 - z_hat) + std_z_hat = np.std(e_theta / (1 - e_theta)**2) + + pearson_resid = (y - z_hat) / std_z_hat + return sum(pearson_resid**2) + + +def pearson_resid_gamma(y, z_hat, alpha): + """ + Computes the Pearson Residual for a Gamma distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be in (0, \infty). + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be in (0, \infty). + + Output + ------ + ||r_P||_2^2: float + The sum of squared Pearson Residuals for Gamma distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + + std_z_hat = z_hat/np.sqrt(alpha) + pearson_resid = (y - z_hat) / std_z_hat + return sum(pearson_resid**2) + + +def anscombe_resid_normal(y, z_hat): + """ + Computes the Anscombe Residual for a normal distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be in (-\infty, \infty). + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be in (-\infty, \infty). + + Output + ------ + ||r_A||_2^2: float + The sum of squared Anscombe Residuals for Normally distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm + """ + + anscomb_resid = y - z_hat + return sum(anscomb_resid**2) + + +def anscombe_resid_poisson(y, z_hat): + """ + Computes the Anscombe Residual for a Poisson distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be in the natural numbers. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be in the natural numbers. + + Output + ------ + ||r_A||_2^2: float + The sum of squared Anscombe Residuals for Poisson distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm + """ + + anscomb_resid = (3/2) * ((y**(2/3)) * z_hat**(-1/6) - z_hat**(1/2)) + return sum(anscomb_resid**2) + + +def anscombe_resid_binomial(m, y, z_hat): + """ + Computes the Anscombe Residual for a Binomial distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be between 0 and 1. + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be between 0 and 1. + + Output + ------ + ||r_A||_2^2: float + The sum of squared Anscombe Residuals for Binomially distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm + """ + + a = 2/3 + b = 2/3 + gamma_ab = gamma(a + b) + gamma_a = gamma(a) + gamma_b = gamma(b) + + reg_par = (gamma_a * gamma_b) / gamma_ab + + beta_y = reg_par * betainc(gamma_a, gamma_b, y) + beta_z_hat = reg_par * betainc(gamma_a, gamma_b, z_hat) + + denom = (z_hat * (1 - z_hat))**(-1 / 6) + + anscomb_resid = np.sqrt(m) * (beta_y - beta_z_hat) / denom + + return sum(anscomb_resid**2) + + +def anscombe_resid_gamma(y, z_hat): + """ + Computes the Anscombe Residual for a Gamma distribution. See Section 5.1 of (Fan et al., 2020). + + Parameters + ---------- + y: array-like shape (n_samples, ) + The response data. + Assumed to be in (0, \infty). + + z_hat: array-like shape (n_samples, ) + The linear predictions z_hat = X @ beta_hat + intercept. + Assumed to be in (0, \infty). + + Output + ------ + ||r_A||_2^2: float + The sum of squared Anscombe Residuals for Normally distributed observations. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm + """ + + anscomb_resid = 3 * ((y / z_hat)**(1/3) - 1) + return sum(anscomb_resid**2) + +######################################################### +# Tuning parameter selection +######################################################### + + +def gcv_score(fit_measure, n_samples, df): + """ + Computes the generalized cross-validation score. See Section 5.6 of (Fan et al., 2020). + + Parameters + ---------- + fit_measure: float + A measure of model fit, e.g. deviance, sum of squared Pearson, or Anscombe Residuals. + + n_samples: int + Number of samples. + + df: float + An estimate of the effective degrees of freedom for the estimated coefficient. + + Output + ------ + gcv: float + Generalized cross-validation score. + + References + ---------- + Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC. + """ + return (1/n_samples) * fit_measure / (1 - (df/n_samples))**2 + + + + + + + + + + + + + + + + + + + + + + diff --git a/yaglm/metrics/info_criteria.py b/yaglm/metrics/info_criteria.py index 2c8a3b5..d7236b4 100644 --- a/yaglm/metrics/info_criteria.py +++ b/yaglm/metrics/info_criteria.py @@ -3,7 +3,7 @@ from yaglm.metrics.base import Scorer from yaglm.autoassign import autoassign -from yaglm.config.penalty import Lasso +from yaglm.config.penalty import Lasso, ElasticNet from yaglm.utils import count_support from yaglm.extmath import log_binom @@ -56,9 +56,9 @@ def __call__(self, estimator, X, y, sample_weight=None): """ # formatting - if not isinstance(estimator.fit_penalty_, Lasso): + if not isinstance(estimator.fit_penalty_, (Lasso, ElasticNet)): raise NotImplementedError("Information criteria is currently only" - " supported for entrywise sparse penalties.") + " supported for Lasso and Elastic Net.") # compute data log-likelihood log_lik = estimator.sample_log_liks(X=X, y=y).sum() diff --git a/yaglm/opt/algo/fista.py b/yaglm/opt/algo/fista.py index af19a51..a51ea73 100644 --- a/yaglm/opt/algo/fista.py +++ b/yaglm/opt/algo/fista.py @@ -241,7 +241,7 @@ def backtracking_search(x, step, bt_iter_prev): # check x difference stopping criterion stop, diff_norm = check_no_change(current=value, prev=value_prev, tol=tol, rel_crit=rel_crit, - norm=stop_crit[-3:] # max or L2 + norm=stop_crit[2:] # max or L2 ) if tracking_level >= 2: diff --git a/yaglm/opt/algo/lla.py b/yaglm/opt/algo/lla.py index f027d98..3cff5b9 100644 --- a/yaglm/opt/algo/lla.py +++ b/yaglm/opt/algo/lla.py @@ -184,7 +184,7 @@ def solve_lla(sub_prob, penalty_func, # check x difference stopping criterion stop, diff_norm = check_no_change(current=_current, prev=prev, tol=tol, rel_crit=rel_crit, - norm=stop_crit[-3:] # max or L2 + norm=stop_crit[2:] # max or L2 ) if tracking_level >= 2: diff --git a/yaglm/opt/nonconvex_utils.py b/yaglm/opt/nonconvex_utils.py index b286768..8de5cab 100644 --- a/yaglm/opt/nonconvex_utils.py +++ b/yaglm/opt/nonconvex_utils.py @@ -66,7 +66,7 @@ def scad_grad_1d(x, pen_val, a=3.7): elif abs_x <= a * pen_val: return np.sign(x) * (a * pen_val - abs_x) / (a - 1) - + else: return 0 diff --git a/yaglm/opt/stopping.py b/yaglm/opt/stopping.py index a10db4d..ffad4fb 100644 --- a/yaglm/opt/stopping.py +++ b/yaglm/opt/stopping.py @@ -53,13 +53,13 @@ def check_no_change(current, prev, norm='max', tol=None, def f(x): return np.abs(x).max() elif norm == 'mad': - def f(x): np.abs(x).mean() + def f(x): return np.abs(x).mean() elif norm == 'L2': - def f(x): np.sqrt(((x) ** 2).sum()) + def f(x): return np.sqrt(((x) ** 2).sum()) elif norm == 'rmse': - def f(x): np.sqrt(((x) ** 2).mean()) + def f(x): return np.sqrt(((x) ** 2).mean()) else: raise ValueError("Bad input to norm: {}".format(norm)) diff --git a/yaglm/toy_data.py b/yaglm/toy_data.py index 9bd252a..64ed8d5 100644 --- a/yaglm/toy_data.py +++ b/yaglm/toy_data.py @@ -3,6 +3,7 @@ from scipy.special import expit from sklearn.utils.extmath import softmax from numbers import Number +from scipy.stats import bernoulli # TODO: add signal to noise ratio for logistic, multinomial and poisson @@ -13,6 +14,7 @@ def sample_sparse_lin_reg(n_samples=100, n_features=10, cov='ar', corr=0.35, beta_type=1, + beta_random_state=None, noise_std=1, snr=None, intercept=0, @@ -72,7 +74,8 @@ def sample_sparse_lin_reg(n_samples=100, n_features=10, # set coefficient and covariance matrix coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero, - n_responses=n_responses, beta_type=beta_type) + n_responses=n_responses, beta_type=beta_type, + random_state = beta_random_state) cov = get_cov(n_features=n_features, cov=cov, corr=corr) @@ -157,6 +160,7 @@ def infuse_outliers(y, prop_bad=.1, random_state=None): # TODO: add signal to noise ratio def sample_sparse_log_reg(n_samples=100, n_features=10, n_nonzero=5, beta_type=1, + beta_random_state=None, cov='ar', corr=0.35, coef_scale=1, @@ -211,7 +215,7 @@ def sample_sparse_log_reg(n_samples=100, n_features=10, n_nonzero=5, # set coefficient and covariance matrix coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero, - beta_type=beta_type) + beta_type=beta_type, random_state = beta_random_state) cov = get_cov(n_features=n_features, cov=cov, corr=corr) @@ -238,6 +242,7 @@ def sample_sparse_log_reg(n_samples=100, n_features=10, n_nonzero=5, def sample_sparse_multinomial(n_samples=100, n_features=10, n_nonzero=5, n_classes=3, beta_type=1, + beta_random_state=None, cov='ar', corr=0.35, coef_scale=1, @@ -292,7 +297,8 @@ def sample_sparse_multinomial(n_samples=100, n_features=10, # set coefficient and covariance matrix coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero, - beta_type=beta_type, n_responses=n_classes) + beta_type=beta_type, n_responses=n_classes, + random_state = beta_random_state) cov = get_cov(n_features=n_features, cov=cov, corr=corr) @@ -322,6 +328,7 @@ def sample_sparse_multinomial(n_samples=100, n_features=10, def sample_sparse_poisson_reg(n_samples=100, n_features=10, n_responses=1, n_nonzero=5, beta_type=1, + beta_random_state=None, coef_scale=1, cov='ar', corr=0.35, @@ -379,7 +386,8 @@ def sample_sparse_poisson_reg(n_samples=100, n_features=10, n_responses=1, # set coefficient and covariance matrix coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero, - beta_type=beta_type) + beta_type=beta_type, + random_state = beta_random_state) cov = get_cov(n_features=n_features, cov=cov, corr=corr) @@ -468,7 +476,26 @@ def get_sparse_coef(n_features=10, n_nonzero=5, n_responses=1, beta_type=1, if neg_idx is not None: assert neg_idx < n_nonzero - if beta_type == 1: + if beta_type == 0: + # Beta value used in Model 1 of Fan et al. 2014 + if n_features > 5: + trailing_zeros = np.zeros(n_features - 5) + coef = [3, 1.5, 0, 0, 2] + coef.extend(trailing_zeros) + coef = np.array(coef) + + elif beta_type == 23: + # Beta value used in Models 2 & 3 of Fan et al. 2014 + if n_features < 10: raise Exception('Number of features needs to be at least 10') + coef = np.zeros(n_features) + idx = np.random.choice(a=n_features, size=10, replace = False) + t = np.random.uniform(1,2, size = 10) + s = np.array([bernoulli.rvs(0.5) for p in range(10)]) # Generate Rademacher random variable + s[s==0] = -1 + coef[idx] = t*s + + + elif beta_type == 1: # roughly equally spaced 1s coef = np.zeros(n_features) @@ -505,6 +532,10 @@ def get_sparse_coef(n_features=10, n_nonzero=5, n_responses=1, beta_type=1, coef = np.concatenate([np.ones(n_nonzero), decay]) support_idxs = np.arange(n_nonzero) + + elif beta_type == 6: + # fully dense + coef = np.ones(n_features) # scale entries by a Laplace if laplace: