diff --git a/R/ftrlprox.default.r b/R/ftrlprox.default.r
index 10a7bf5..6be84e9 100644
--- a/R/ftrlprox.default.r
+++ b/R/ftrlprox.default.r
@@ -11,7 +11,7 @@
 #' @param alpha mixing parameter, alpha=0 corresponds to L2 regularization and alpha=1 to L1.
 #' @param a learning rate parameter.
 #' @param b learning rate parameter controlling decay, defaults to 1.
-#' @param num_epochs number of times we should traverse over the traiing set, defaults to 1.
+#' @param epochs number of times we should traverse over the traiing set, defaults to 1.
 #' @param save_loss is to save the loss function during training.
 #' @param ... additional args
 #' @return ftrlprox model object
@@ -22,7 +22,7 @@
 #' @importFrom methods as
 #' @export
 ##------------------------------------------------------------------------------
-ftrlprox.default <- function(x, y, lambda, alpha, a, b=1, num_epochs=1,
+ftrlprox.default <- function(x, y, lambda, alpha, a, b=1, epochs=1,
                              save_loss=F, ...) {
   if (nrow(x) != length(y))
     stop(sprintf("Input has differing number of rows, nrow(x)=%d, length(y)=%d",
@@ -45,7 +45,7 @@ ftrlprox.default <- function(x, y, lambda, alpha, a, b=1, num_epochs=1,
     x <- as(x,"dgCMatrix")
   }
 
-  J = if (save_loss) numeric(nrow(x)*num_epochs) else numeric(0)
+  J = if (save_loss) numeric(nrow(x)) else numeric(0)
 
   out <- if(is_sparse) {
           .C("splognet_ftrlprox",
@@ -59,7 +59,6 @@ ftrlprox.default <- function(x, y, lambda, alpha, a, b=1, num_epochs=1,
              z=double(ncol(x)),
              nn=double(ncol(x)),
              J=J,
-             num_epochs=as.integer(num_epochs),
              a=as.double(a),
              b=as.double(b),
              lambda1=as.double(alpha*lambda),
@@ -75,7 +74,6 @@ ftrlprox.default <- function(x, y, lambda, alpha, a, b=1, num_epochs=1,
              z=double(ncol(x)),
              nn=double(ncol(x)),
              J=J,
-             num_epochs=as.integer(num_epochs),
              a=as.double(a),
              b=as.double(b),
              lambda1=as.double(alpha*lambda),
@@ -103,6 +101,12 @@ ftrlprox.default <- function(x, y, lambda, alpha, a, b=1, num_epochs=1,
   out$levels <- levels(y)
 
   class(out) <- "ftrlprox"
+
+  while (epochs > 1) {
+          idx <- sample(1:nrow(x))
+          out <- update(out, x[idx, ], y[idx], save_loss=save_loss)
+          epochs <- epochs - 1
+  }
   out
 }
 
diff --git a/R/ftrlprox.formula.r b/R/ftrlprox.formula.r
index f1b96d6..3173853 100644
--- a/R/ftrlprox.formula.r
+++ b/R/ftrlprox.formula.r
@@ -11,7 +11,7 @@
 #' @param alpha mixing parameter, alpha=0 corresponds to L2 regularization and alpha=1 to L1.
 #' @param a learning rate parameter
 #' @param b learning rate parameter controlling decay, defaults to 1.
-#' @param num_epochs number of times we should traverse over the traiing set, defaults to 1.
+#' @param epochs number of times we should traverse over the traiing set, defaults to 1.
 #' @param save_loss is to save the loss function during training.
 #' @param ... additional args
 #' @return ftrlprox model object
@@ -30,13 +30,13 @@
 #'                 a = 0.3, lambda = 5.0, alpha = 1.0)
 #' print(mdl)
 ##------------------------------------------------------------------------------
-ftrlprox.formula <- function(formula, data, lambda, alpha, a, b=1, num_epochs=1, save_loss=F, ...) {
+ftrlprox.formula <- function(formula, data, lambda, alpha, a, b=1, epochs=1, save_loss=F, ...) {
 
   X <- model.matrix(formula, data)
   y <- data[[all.vars(formula[[2]])]]
 
   ftrlprox(X,y,a=a,b=b,
            lambda=lambda, alpha=alpha,
-           num_epochs=num_epochs, save_loss=save_loss)
+           epochs=epochs, save_loss=save_loss)
 }
 
diff --git a/R/initialize.ftrlprox.r b/R/initialize.ftrlprox.r
index a321af5..60d6cb2 100644
--- a/R/initialize.ftrlprox.r
+++ b/R/initialize.ftrlprox.r
@@ -30,7 +30,7 @@ initialize.ftrlprox <- function(theta, levels, lambda, alpha, a, b=1, save_loss=
               z = numeric(n),
               nn = numeric(n),
               J = numeric(0),
-              num_epochs = 1,
+              epochs = 1,
               a = a,
               b = b,
               lambda = lambda,
diff --git a/R/update.ftrlprox.r b/R/update.ftrlprox.r
index d050de3..560a70b 100644
--- a/R/update.ftrlprox.r
+++ b/R/update.ftrlprox.r
@@ -8,7 +8,7 @@
 #' @param object the model object
 #' @param newX new feature vectors. This needs to be the same features as used in previous training rounds for this object.
 #' @param newY new observations
-#' @param num_epochs number of times we should traverse over the training data, defaults to 1.
+#' @param epochs number of times we should traverse over the training data, defaults to 1.
 #' @param save_loss is to save the loss function during training. This will be appended to previous loss vector.
 #' @param ... additional args
 #' @return ftrlprox model object
@@ -19,7 +19,7 @@
 #' @importFrom methods as
 #' @export
 ##------------------------------------------------------------------------------
-update.ftrlprox <- function(object, newX, newY, num_epochs=1, save_loss=F, ...) {
+update.ftrlprox <- function(object, newX, newY, epochs=1, save_loss=F, ...) {
   if (!is.factor(newY))
     stop("Dependent variable must be a factor")
 
@@ -40,7 +40,7 @@ update.ftrlprox <- function(object, newX, newY, num_epochs=1, save_loss=F, ...)
     newX <- as(newX,"dgCMatrix")
   }
 
-  J = if (save_loss) numeric(nrow(newX)*num_epochs) else numeric(0)
+  J = if (save_loss) numeric(nrow(newX)) else numeric(0)
 
   out <- if(is_sparse) {
           .C("splognet_ftrlprox",
@@ -54,7 +54,6 @@ update.ftrlprox <- function(object, newX, newY, num_epochs=1, save_loss=F, ...)
              z=object$z,
              nn=object$nn,
              J=J,
-             num_epochs=as.integer(num_epochs),
              a=as.double(object$a),
              b=as.double(object$b),
              lambda1=as.double(object$alpha*object$lambda),
@@ -70,7 +69,6 @@ update.ftrlprox <- function(object, newX, newY, num_epochs=1, save_loss=F, ...)
              z=object$z,
              nn=object$nn,
              J=J,
-             num_epochs=as.integer(num_epochs),
              a=as.double(object$a),
              b=as.double(object$b),
              lambda1=as.double(object$alpha*object$lambda),
@@ -87,7 +85,6 @@ update.ftrlprox <- function(object, newX, newY, num_epochs=1, save_loss=F, ...)
   out$m <- NULL
   out$n <- NULL
   out$save_loss <- NULL
-  out$num_epochs <- NULL
 
   if (is_sparse) {
       out$ix <- NULL
@@ -105,5 +102,11 @@ update.ftrlprox <- function(object, newX, newY, num_epochs=1, save_loss=F, ...)
   out$alpha  <- object$alpha
 
   class(out) <- "ftrlprox"
+
+  while (epochs > 1) {
+          idx <- sample(1:nrow(newX))
+          out <- update(out, newX[idx, ], newY[idx], save_loss=save_loss)
+          epochs <- epochs - 1
+  }
   out
 }
diff --git a/man/ftrlprox.default.Rd b/man/ftrlprox.default.Rd
index e385820..719e11f 100644
--- a/man/ftrlprox.default.Rd
+++ b/man/ftrlprox.default.Rd
@@ -4,7 +4,7 @@
 \alias{ftrlprox.default}
 \title{FTRL Proximal for matrix class}
 \usage{
-\method{ftrlprox}{default}(x, y, lambda, alpha, a, b = 1, num_epochs = 1,
+\method{ftrlprox}{default}(x, y, lambda, alpha, a, b = 1, epochs = 1,
   save_loss = F, ...)
 }
 \arguments{
@@ -20,7 +20,7 @@
 
 \item{b}{learning rate parameter controlling decay, defaults to 1.}
 
-\item{num_epochs}{number of times we should traverse over the traiing set, defaults to 1.}
+\item{epochs}{number of times we should traverse over the traiing set, defaults to 1.}
 
 \item{save_loss}{is to save the loss function during training.}
 
diff --git a/man/ftrlprox.formula.Rd b/man/ftrlprox.formula.Rd
index 7e49e80..6dcb60a 100644
--- a/man/ftrlprox.formula.Rd
+++ b/man/ftrlprox.formula.Rd
@@ -5,7 +5,7 @@
 \title{FTRL Proximal formula}
 \usage{
 \method{ftrlprox}{formula}(formula, data, lambda, alpha, a, b = 1,
-  num_epochs = 1, save_loss = F, ...)
+  epochs = 1, save_loss = F, ...)
 }
 \arguments{
 \item{formula}{modeling formula}
@@ -20,7 +20,7 @@
 
 \item{b}{learning rate parameter controlling decay, defaults to 1.}
 
-\item{num_epochs}{number of times we should traverse over the traiing set, defaults to 1.}
+\item{epochs}{number of times we should traverse over the traiing set, defaults to 1.}
 
 \item{save_loss}{is to save the loss function during training.}
 
diff --git a/man/update.ftrlprox.Rd b/man/update.ftrlprox.Rd
index e942025..781cbd0 100644
--- a/man/update.ftrlprox.Rd
+++ b/man/update.ftrlprox.Rd
@@ -4,7 +4,7 @@
 \alias{update.ftrlprox}
 \title{Update FTRL Proximal model}
 \usage{
-\method{update}{ftrlprox}(object, newX, newY, num_epochs = 1, save_loss = F,
+\method{update}{ftrlprox}(object, newX, newY, epochs = 1, save_loss = F,
   ...)
 }
 \arguments{
@@ -14,7 +14,7 @@
 
 \item{newY}{new observations}
 
-\item{num_epochs}{number of times we should traverse over the training data, defaults to 1.}
+\item{epochs}{number of times we should traverse over the training data, defaults to 1.}
 
 \item{save_loss}{is to save the loss function during training. This will be appended to previous loss vector.}
 
diff --git a/src/lognet.c b/src/lognet.c
index cd317c5..c412d65 100644
--- a/src/lognet.c
+++ b/src/lognet.c
@@ -51,13 +51,12 @@ void lognet_predict(double *X, double *theta, double *yhat, int *m, int *n)
  */
 void lognet_ftrlprox(double *X, double *theta, double *y, int *m, 
                      int *n, double *z, double *nn, double *J,
-                     unsigned int *num_epochs, double *alpha, double *bnn,
+                     double *alpha, double *bnn,
                      double *lambda1, double *lambda2, unsigned int *save_loss)
 
 {
         if (DEBUG)  {
-                printf("num_epochs: %u\n", *num_epochs);
-                printf("num_itr: %u\n", ((*m)*(*num_epochs)));
+                printf("num_itr: %u\n", (*m));
                 print_matrix(X, (*m), (*n));
         }
 
@@ -66,7 +65,7 @@ void lognet_ftrlprox(double *X, double *theta, double *y, int *m,
         double *sig = malloc((*n)*sizeof(double));
         double *x = malloc((*n)*sizeof(double));
 
-        for (int t = 0; t < ((*m)*(*num_epochs)); t++) {
+        for (int t = 0; t < (*m); t++) {
                 node_t *l1 = malloc(sizeof(node_t));
                 l1->next = NULL;
                 node_t *li = l1;
diff --git a/src/splognet.c b/src/splognet.c
index 72dcdae..f36d642 100644
--- a/src/splognet.c
+++ b/src/splognet.c
@@ -32,14 +32,13 @@ void splognet_predict(double *X, double *theta, double *yhat, int *m, int *n)
  */
 void splognet_ftrlprox(double *X, int *ix, int* jx, double *theta, double *y,
                        int *m, int *n, double *z, double *nn, 
-                       double *J, unsigned int *num_epochs, double *alpha,
+                       double *J, double *alpha,
                        double *bnn, double *lambda1, double *lambda2,
                        unsigned int *save_loss)
 
 {
         if (DEBUG)  {
-                printf("num_epochs: %u\n", *num_epochs);
-                printf("num_itr: %u\n", ((*m)*(*num_epochs)));
+                printf("num_itr: %u\n", (*m));
                 print_matrix(X, (*m), (*n));
         }
 
@@ -48,7 +47,7 @@ void splognet_ftrlprox(double *X, int *ix, int* jx, double *theta, double *y,
         double *sig = malloc((*n)*sizeof(double));
         double *x = malloc((*n)*sizeof(double));
 
-        for (int t = 0; t < ((*m)*(*num_epochs)); t++) {
+        for (int t = 0; t < (*m); t++) {
                 node_t *l1 = malloc(sizeof(node_t));
                 l1->next = NULL;
                 node_t *li = l1;
diff --git a/tests/testthat/test_ftrlprox_matrix.r b/tests/testthat/test_ftrlprox_matrix.r
index ced82b0..7c472f6 100644
--- a/tests/testthat/test_ftrlprox_matrix.r
+++ b/tests/testthat/test_ftrlprox_matrix.r
@@ -44,9 +44,8 @@ test_that("Saving loss", {
 })
 
 test_that("Saving loss many epochs", {
-          mdl <- ftrlprox(X, dat$y, a=0.3, b=1,
-                          lambda=1, alpha=1,
-                          save_loss=TRUE, num_epochs=10)
+          mdl <- ftrlprox(X, dat$y, a=0.3, b=1, lambda=1, alpha=1,
+                          save_loss=TRUE, epochs=10)
 
           expect_equal(length(mdl$J), 10*nrow(X))
           expect_true(all(mdl$J != 0.0))
diff --git a/tests/testthat/test_update_ftrlprox.r b/tests/testthat/test_update_ftrlprox.r
index 88c5c1b..14e3797 100644
--- a/tests/testthat/test_update_ftrlprox.r
+++ b/tests/testthat/test_update_ftrlprox.r
@@ -47,9 +47,9 @@ test_that("Saving loss", {
 
 test_that("Saving loss many epochs", {
           mdl <- ftrlprox(X[1:50, ], dat$classes[1:50], a=0.3,
-                          lambda=0, alpha=0, save_loss=TRUE, num_epochs=10)
+                          lambda=0, alpha=0, save_loss=TRUE, epochs=10)
           mdl <- update(mdl, X[51:100, ], dat$classes[51:100],
-                        save_loss=TRUE, num_epochs=10)
+                        save_loss=TRUE, epochs=10)
 
           expect_equal(length(mdl$J), 10*nrow(X))
           expect_true(all(mdl$J != 0.0))