diff --git a/yaglm/config/penalty.py b/yaglm/config/penalty.py
index 5b1cf71..e7007b1 100644
--- a/yaglm/config/penalty.py
+++ b/yaglm/config/penalty.py
@@ -31,7 +31,7 @@ class Ridge(WithPenSeqConfig):
         (Optional) Weights for each term in the penalty.
     """
     @autoassign
-    def __init__(self, pen_val=1, weights=None): pass
+    def __init__(self, pen_val=1, weights=None, targ_ubd=1): pass
 
     def get_pen_val_max(self, X, y, loss, fit_intercept=True,
                         sample_weight=None, init_data=None):
@@ -40,7 +40,7 @@ def get_pen_val_max(self, X, y, loss, fit_intercept=True,
                                  weights=self.weights,
                                  fit_intercept=fit_intercept,
                                  sample_weight=sample_weight,
-                                 targ_ubd=1,
+                                 targ_ubd=self.targ_ubd,
                                  norm_by_dim=True)
 
 
@@ -310,12 +310,12 @@ class ElasticNet(ElasticNetConfig):
     """
     Represents the ElasticNet penalty
 
-    pen_val * mix_val ||coef||_1 + 0.5 * pen_val * (1 - mix_val) * ||coef||_2^2
+    pen_val * mix_val ||coef||_1 + pen_val * (1 - mix_val) * ||coef||_2^2
 
     The Lasso may have weights (though not the ridge at this time) or may be flavored.
     We define the non-convex elastic net as
 
-    non-convex_{pen_val * mix_val} (coef) + 0.5 * pen_val * (1 - mix_val) * ||coef||_2^2
+    non-convex_{pen_val * mix_val} (coef) + pen_val * (1 - mix_val) * ||coef||_2^2
 
     Parameters
     ----------
diff --git a/yaglm/infer/Inferencer.py b/yaglm/infer/Inferencer.py
index 9155ccd..11c3249 100644
--- a/yaglm/infer/Inferencer.py
+++ b/yaglm/infer/Inferencer.py
@@ -6,7 +6,7 @@
 from yaglm.config.base import Config
 from yaglm.autoassign import autoassign
 from yaglm.config.penalty import Lasso
-from yaglm.infer.dof import est_dof_support
+from yaglm.infer.dof import est_dof_support, est_dof_enet
 from yaglm.utils import is_fitted
 from yaglm.config.loss import get_loss_config
 
@@ -136,6 +136,15 @@ def after_fit(self, estimator, X, y, sample_weight=None):
             else:
                 # we don't currently support estimating the DoF for this model
                 self.dof_ = None
+                
+        elif self.dof == 'enet':
+            
+            self.dof_ = est_dof_enet(coef = estimator.coef_,
+                                     pen_val=estimator.fit_penalty_.pen_val,
+                                     mix_val=estimator.fit_penalty_.mix_val,
+                                     X = X,
+                                     intercept=estimator.intercept_,
+                                     zero_tol=zero_tol)
 
         elif isinstance(self.dof, Number):
             # user provided DOF value
diff --git a/yaglm/infer/dof.py b/yaglm/infer/dof.py
index fddbb5c..1a6f0fe 100644
--- a/yaglm/infer/dof.py
+++ b/yaglm/infer/dof.py
@@ -49,3 +49,66 @@ def est_dof_support(coef, intercept=None, transform=None, zero_tol=1e-6):
 
     DoF = n_nonzero_coef + n_vals_intercept
     return DoF
+
+
+def est_dof_enet(coef, pen_val, mix_val, X, intercept=None, zero_tol=1e-6):
+    
+    """
+    The size of the support of the estimated coefficient for elastic net at a particular 
+    penalty and mixing value.
+    
+    ElasticNet penalty:
+
+    pen_val * mix_val ||coef||_1 + pen_val * (1 - mix_val) * ||coef||_2^2
+
+    Parameters
+    ----------
+    coef: array-like
+        The estimated coefficient.
+        
+    pen_val: float,
+        current penalty value in the elastic net penalty
+    
+    mix_val: float,
+        current mixing value in the elastic net penalty
+        
+    X: (n,d)-array,
+        design matrix excluding the intercept column
+
+    intercept: None, float, array-like
+        (Optional) The estimated coefficeint.
+
+    zero_tol: float
+        Tolerance for declaring a small value equal to zero. This addresses numerical issues where some solvers may not return exact zeros.
+
+    Output
+    ------
+    DoF: int
+        The estimaed number of degrees of freedom. The DoF of the coefficeint is given by either ||coef||_0 or ||transform(coef)||_0
+
+    References
+    ----------
+    Zou, H., Hastie, T. and Tibshirani, R., 2007. On the “degrees of freedom” of the lasso. The Annals of Statistics, 35(5), pp.2173-2192.
+    """
+    
+    # Get the estimated support from the fitted coefficient
+    if intercept is not None:
+        coef = np.concatenate([[intercept], coef])
+        
+    support = (abs(coef) > zero_tol)
+    
+    # tuning parameter attached to the ridge penalty
+    lambda_2 = pen_val * (1 - mix_val)
+    
+    # Get the columns of the design matrix that correspond to the non-zero coef
+    if intercept is not None:
+        ones = np.ones((X.shape[0], 1))
+        X = np.concatenate([ones, X], axis = 1)
+    
+    X_A = X[:, support].copy()
+    
+    xtx_li_inv = np.linalg.inv(X_A.T @ X_A + lambda_2 * np.identity(X_A.shape[1]))
+    
+    DoF = np.trace(X_A @ xtx_li_inv @ X_A.T)
+    
+    return(DoF)
diff --git a/yaglm/metrics/deviance_measures.py b/yaglm/metrics/deviance_measures.py
new file mode 100644
index 0000000..9a158d9
--- /dev/null
+++ b/yaglm/metrics/deviance_measures.py
@@ -0,0 +1,452 @@
+import numpy as np
+from scipy.special import betainc, gamma
+
+#########################################################
+# Deviance measures
+#########################################################
+
+
+def normal_deviance(y, z_hat, scale=1, sample_weight=None):
+    """
+    Computes the (possibly scaled) deviance for a normal distribution. See Section 5.1 of (Fan et al., 2020).
+
+    Parameters
+    ----------
+    y: array-like shape (n_samples, )
+        The response data.
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept_hat.
+        
+    scale: float
+        The noise variance.
+
+    sample_weight: None, array-like shape (n_samples, )
+        (Optional) Sample weights.
+
+    Output
+    ------
+    dev: float
+        The deviance value.
+
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    # compute scaled deviace for each sample
+    sample_devs = (y-z_hat)**2 / scale
+    
+    # sum the sample deviances!
+    if sample_weight is None:
+        return sample_devs.sum()
+    else:
+        return sample_weight.T @ sample_devs
+
+
+def poisson_deviance(y, z_hat, sample_weight = None):
+    """
+    Computes the (possibly scaled) deviance for a poisson distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------
+    y: array-like shape (n_samples, )
+        The response data.
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+
+    sample_weight: None, array-like shape (n_samples, )
+        Optional sample weights
+
+    Output
+    ------
+    dev: float
+        The deviance value.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    d_i2 = y*np.log(y/z_hat) - (y-z_hat)
+    
+    if sample_weight is None:
+        return 2*sum(d_i2)
+    else: 
+        return 2*(sample_weight.T @ d_i2)
+
+
+def binomial_deviance(n, y, z_hat, sample_weight = None):
+    """
+    Computes the (possibly scaled) deviance for a binomial distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------
+    n: array-like shape (n_samples, )
+        The number of observations from which the response data were drawn from.
+    
+    y: array-like shape (n_samples, )
+        The response data.
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+
+    sample_weight: None, array-like shape (n_samples, )
+        Optional sample weights
+
+    Output
+    ------
+    dev: float
+        The deviance value.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    d_i2 = y*np.log(y/z_hat) + (n-y)*np.log((n-y)/(n-z_hat))
+    
+    if sample_weight is None:
+        return 2*sum(d_i2)
+    else: 
+        return 2*(sample_weight.T @ d_i2)
+    
+    
+def gamma_deviance(y, z_hat, sample_weight = None):
+    """
+    Computes the (possibly scaled) deviance for a gamma distribution. See Section 5.1 of (Fan et al., 2020).
+
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+
+    sample_weight: None, array-like shape (n_samples, )
+        Optional sample weights
+
+    Output
+    ------
+    dev: float
+        The deviance value.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    d_i2 = -np.log(y/z_hat) + (y - z_hat)/z_hat
+
+    if sample_weight is None:
+        return 2*sum(d_i2)
+    else: 
+        return 2*(sample_weight.T @ d_i2)
+        
+
+#########################################################
+# Additional Residuals
+#########################################################
+
+
+def pearson_resid_normal(y, z_hat):
+    """
+    Computes the Pearson Residual for a normal distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be in (-\infty, \infty).
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be in (-\infty, \infty).
+
+    Output
+    ------
+    ||r_P||_2^2: float
+        The sum of squared Pearson Residuals for Normally distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+
+    pearson_resid = (y - z_hat)
+
+    return sum(pearson_resid**2)
+
+
+def pearson_resid_poisson(y, z_hat):
+    """
+    Computes the Pearson Residual for a Poisson distribution. See Section 5.1 of (Fan et al., 2020). 
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be in the natural numbers.
+            
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be in the natural numbers.
+
+    Output
+    ------
+    ||r_P||_2^2: float
+        The sum of squared Pearson Residuals for Poisson distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    std_z_hat = z_hat**(1/2)
+    pearson_resid = (y - z_hat) / std_z_hat
+    return sum(pearson_resid**2)
+
+
+def pearson_resid_binomial(y, z_hat):
+    """
+    Computes the Pearson Residual for a Binomial distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be between 0 and 1.
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be between 0 and 1.
+
+    Output
+    ------
+    ||r_P||_2^2: float
+        The sum of squared Pearson Residuals for Binomially distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    e_theta = z_hat/(1 - z_hat)
+    std_z_hat = np.std(e_theta / (1 - e_theta)**2)
+    
+    pearson_resid = (y - z_hat) / std_z_hat
+    return sum(pearson_resid**2)
+
+
+def pearson_resid_gamma(y, z_hat, alpha):
+    """
+    Computes the Pearson Residual for a Gamma distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be in (0, \infty).
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be in (0, \infty).
+
+    Output
+    ------
+    ||r_P||_2^2: float
+        The sum of squared Pearson Residuals for Gamma distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    
+    std_z_hat = z_hat/np.sqrt(alpha)
+    pearson_resid = (y - z_hat) / std_z_hat
+    return sum(pearson_resid**2) 
+    
+    
+def anscombe_resid_normal(y, z_hat):
+    """
+    Computes the Anscombe Residual for a normal distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be in (-\infty, \infty).
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be in (-\infty, \infty).
+
+    Output
+    ------
+    ||r_A||_2^2: float
+        The sum of squared Anscombe Residuals for Normally distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm
+    """
+    
+    anscomb_resid = y - z_hat
+    return sum(anscomb_resid**2)
+
+
+def anscombe_resid_poisson(y, z_hat):
+    """
+    Computes the Anscombe Residual for a Poisson distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be in the natural numbers.
+            
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be in the natural numbers.
+
+    Output
+    ------
+    ||r_A||_2^2: float
+        The sum of squared Anscombe Residuals for Poisson distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm
+    """
+    
+    anscomb_resid = (3/2) * ((y**(2/3)) * z_hat**(-1/6) - z_hat**(1/2))
+    return sum(anscomb_resid**2)
+
+
+def anscombe_resid_binomial(m, y, z_hat):
+    """
+    Computes the Anscombe Residual for a Binomial distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be between 0 and 1.
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be between 0 and 1.
+
+    Output
+    ------
+    ||r_A||_2^2: float
+        The sum of squared Anscombe Residuals for Binomially distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm
+    """
+    
+    a = 2/3
+    b = 2/3
+    gamma_ab = gamma(a + b)
+    gamma_a = gamma(a)
+    gamma_b = gamma(b)
+    
+    reg_par = (gamma_a * gamma_b) / gamma_ab
+    
+    beta_y = reg_par * betainc(gamma_a, gamma_b, y)
+    beta_z_hat = reg_par * betainc(gamma_a, gamma_b, z_hat)
+    
+    denom = (z_hat * (1 - z_hat))**(-1 / 6)
+    
+    anscomb_resid = np.sqrt(m) * (beta_y - beta_z_hat) / denom
+    
+    return sum(anscomb_resid**2)
+
+
+def anscombe_resid_gamma(y, z_hat):
+    """
+    Computes the Anscombe Residual for a Gamma distribution. See Section 5.1 of (Fan et al., 2020).
+    
+    Parameters
+    ----------    
+    y: array-like shape (n_samples, )
+        The response data.
+        Assumed to be in (0, \infty).
+
+    z_hat: array-like shape (n_samples, )
+        The linear predictions z_hat = X @ beta_hat + intercept.
+        Assumed to be in (0, \infty).
+
+    Output
+    ------
+    ||r_A||_2^2: float
+        The sum of squared Anscombe Residuals for Normally distributed observations.
+        
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    https://v8doc.sas.com/sashtml/insight/chap39/sect57.htm
+    """
+    
+    anscomb_resid = 3 * ((y / z_hat)**(1/3) - 1)
+    return sum(anscomb_resid**2)
+
+#########################################################
+# Tuning parameter selection
+#########################################################
+
+
+def gcv_score(fit_measure, n_samples, df):
+    """
+    Computes the generalized cross-validation score. See Section 5.6 of (Fan et al., 2020).
+
+    Parameters
+    ----------    
+    fit_measure: float
+       A measure of model fit, e.g. deviance, sum of squared Pearson, or Anscombe Residuals.
+
+    n_samples: int
+       Number of samples.
+       
+    df: float
+        An estimate of the effective degrees of freedom for the estimated coefficient.
+
+    Output
+    ------
+    gcv: float
+        Generalized cross-validation score.
+
+    References
+    ----------
+    Fan, J., Li, R., Zhang, C.H. and Zou, H., 2020. Statistical foundations of data science. Chapman and Hall/CRC.
+    """
+    return (1/n_samples) * fit_measure / (1 - (df/n_samples))**2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/yaglm/metrics/info_criteria.py b/yaglm/metrics/info_criteria.py
index 2c8a3b5..d7236b4 100644
--- a/yaglm/metrics/info_criteria.py
+++ b/yaglm/metrics/info_criteria.py
@@ -3,7 +3,7 @@
 from yaglm.metrics.base import Scorer
 from yaglm.autoassign import autoassign
 
-from yaglm.config.penalty import Lasso
+from yaglm.config.penalty import Lasso, ElasticNet
 from yaglm.utils import count_support
 from yaglm.extmath import log_binom
 
@@ -56,9 +56,9 @@ def __call__(self, estimator, X, y, sample_weight=None):
         """
 
         # formatting
-        if not isinstance(estimator.fit_penalty_, Lasso):
+        if not isinstance(estimator.fit_penalty_, (Lasso, ElasticNet)):
             raise NotImplementedError("Information criteria is currently only"
-                                      " supported for entrywise sparse penalties.")
+                                      " supported for Lasso and Elastic Net.")
 
         # compute data log-likelihood
         log_lik = estimator.sample_log_liks(X=X, y=y).sum()
diff --git a/yaglm/opt/algo/fista.py b/yaglm/opt/algo/fista.py
index af19a51..a51ea73 100644
--- a/yaglm/opt/algo/fista.py
+++ b/yaglm/opt/algo/fista.py
@@ -241,7 +241,7 @@ def backtracking_search(x, step, bt_iter_prev):
             # check x difference stopping criterion
             stop, diff_norm = check_no_change(current=value, prev=value_prev,
                                               tol=tol, rel_crit=rel_crit,
-                                              norm=stop_crit[-3:]  # max or L2
+                                              norm=stop_crit[2:]  # max or L2
                                               )
 
             if tracking_level >= 2:
diff --git a/yaglm/opt/algo/lla.py b/yaglm/opt/algo/lla.py
index f027d98..3cff5b9 100644
--- a/yaglm/opt/algo/lla.py
+++ b/yaglm/opt/algo/lla.py
@@ -184,7 +184,7 @@ def solve_lla(sub_prob, penalty_func,
             # check x difference stopping criterion
             stop, diff_norm = check_no_change(current=_current, prev=prev,
                                               tol=tol, rel_crit=rel_crit,
-                                              norm=stop_crit[-3:]  # max or L2
+                                              norm=stop_crit[2:]  # max or L2
                                               )
 
             if tracking_level >= 2:
diff --git a/yaglm/opt/nonconvex_utils.py b/yaglm/opt/nonconvex_utils.py
index b286768..8de5cab 100644
--- a/yaglm/opt/nonconvex_utils.py
+++ b/yaglm/opt/nonconvex_utils.py
@@ -66,7 +66,7 @@ def scad_grad_1d(x, pen_val, a=3.7):
 
     elif abs_x <= a * pen_val:
         return np.sign(x) * (a * pen_val - abs_x) / (a - 1)
-
+    
     else:
         return 0
 
diff --git a/yaglm/opt/stopping.py b/yaglm/opt/stopping.py
index a10db4d..ffad4fb 100644
--- a/yaglm/opt/stopping.py
+++ b/yaglm/opt/stopping.py
@@ -53,13 +53,13 @@ def check_no_change(current, prev, norm='max', tol=None,
         def f(x): return np.abs(x).max()
 
     elif norm == 'mad':
-        def f(x): np.abs(x).mean()
+        def f(x): return np.abs(x).mean()
 
     elif norm == 'L2':
-        def f(x): np.sqrt(((x) ** 2).sum())
+        def f(x): return np.sqrt(((x) ** 2).sum())
 
     elif norm == 'rmse':
-        def f(x): np.sqrt(((x) ** 2).mean())
+        def f(x): return np.sqrt(((x) ** 2).mean())
 
     else:
         raise ValueError("Bad input to norm: {}".format(norm))
diff --git a/yaglm/toy_data.py b/yaglm/toy_data.py
index 9bd252a..64ed8d5 100644
--- a/yaglm/toy_data.py
+++ b/yaglm/toy_data.py
@@ -3,6 +3,7 @@
 from scipy.special import expit
 from sklearn.utils.extmath import softmax
 from numbers import Number
+from scipy.stats import bernoulli
 
 # TODO: add signal to noise ratio for logistic, multinomial and poisson
 
@@ -13,6 +14,7 @@ def sample_sparse_lin_reg(n_samples=100, n_features=10,
                           cov='ar',
                           corr=0.35,
                           beta_type=1,
+                          beta_random_state=None,
                           noise_std=1,
                           snr=None,
                           intercept=0,
@@ -72,7 +74,8 @@ def sample_sparse_lin_reg(n_samples=100, n_features=10,
 
     # set coefficient and covariance matrix
     coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero,
-                           n_responses=n_responses, beta_type=beta_type)
+                           n_responses=n_responses, beta_type=beta_type,
+                           random_state = beta_random_state)
 
     cov = get_cov(n_features=n_features, cov=cov, corr=corr)
 
@@ -157,6 +160,7 @@ def infuse_outliers(y, prop_bad=.1, random_state=None):
 # TODO: add signal to noise ratio
 def sample_sparse_log_reg(n_samples=100, n_features=10, n_nonzero=5,
                           beta_type=1,
+                          beta_random_state=None,
                           cov='ar',
                           corr=0.35,
                           coef_scale=1,
@@ -211,7 +215,7 @@ def sample_sparse_log_reg(n_samples=100, n_features=10, n_nonzero=5,
 
     # set coefficient and covariance matrix
     coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero,
-                           beta_type=beta_type)
+                           beta_type=beta_type, random_state = beta_random_state)
 
     cov = get_cov(n_features=n_features, cov=cov, corr=corr)
 
@@ -238,6 +242,7 @@ def sample_sparse_log_reg(n_samples=100, n_features=10, n_nonzero=5,
 
 def sample_sparse_multinomial(n_samples=100, n_features=10,
                               n_nonzero=5, n_classes=3, beta_type=1,
+                              beta_random_state=None,
                               cov='ar',
                               corr=0.35,
                               coef_scale=1,
@@ -292,7 +297,8 @@ def sample_sparse_multinomial(n_samples=100, n_features=10,
 
     # set coefficient and covariance matrix
     coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero,
-                           beta_type=beta_type, n_responses=n_classes)
+                           beta_type=beta_type, n_responses=n_classes,
+                           random_state = beta_random_state)
 
     cov = get_cov(n_features=n_features, cov=cov, corr=corr)
 
@@ -322,6 +328,7 @@ def sample_sparse_multinomial(n_samples=100, n_features=10,
 def sample_sparse_poisson_reg(n_samples=100, n_features=10, n_responses=1,
                               n_nonzero=5,
                               beta_type=1,
+                              beta_random_state=None,
                               coef_scale=1,
                               cov='ar',
                               corr=0.35,
@@ -379,7 +386,8 @@ def sample_sparse_poisson_reg(n_samples=100, n_features=10, n_responses=1,
 
     # set coefficient and covariance matrix
     coef = get_sparse_coef(n_features=n_features, n_nonzero=n_nonzero,
-                           beta_type=beta_type)
+                           beta_type=beta_type, 
+                           random_state = beta_random_state)
 
     cov = get_cov(n_features=n_features, cov=cov, corr=corr)
 
@@ -468,7 +476,26 @@ def get_sparse_coef(n_features=10, n_nonzero=5, n_responses=1, beta_type=1,
     if neg_idx is not None:
         assert neg_idx < n_nonzero
 
-    if beta_type == 1:
+    if beta_type == 0:
+        # Beta value used in Model 1 of Fan et al. 2014
+        if n_features > 5:
+            trailing_zeros = np.zeros(n_features - 5)
+        coef = [3, 1.5, 0, 0, 2]
+        coef.extend(trailing_zeros)
+        coef = np.array(coef)
+        
+    elif beta_type == 23:
+        # Beta value used in Models 2 & 3 of Fan et al. 2014
+        if n_features < 10: raise Exception('Number of features needs to be at least 10')
+        coef = np.zeros(n_features)
+        idx = np.random.choice(a=n_features, size=10, replace = False)
+        t = np.random.uniform(1,2, size = 10)
+        s = np.array([bernoulli.rvs(0.5) for p in range(10)]) # Generate Rademacher random variable
+        s[s==0] = -1
+        coef[idx] = t*s
+        
+
+    elif beta_type == 1:
         # roughly equally spaced 1s
         coef = np.zeros(n_features)
 
@@ -505,6 +532,10 @@ def get_sparse_coef(n_features=10, n_nonzero=5, n_responses=1, beta_type=1,
         coef = np.concatenate([np.ones(n_nonzero), decay])
 
         support_idxs = np.arange(n_nonzero)
+        
+    elif beta_type == 6:
+        # fully dense
+        coef = np.ones(n_features)
 
     # scale entries by a Laplace
     if laplace: