diff --git a/src/lib.c b/src/lib.c
index 974503e..0e3f0b9 100644
--- a/src/lib.c
+++ b/src/lib.c
@@ -91,6 +91,7 @@ int read_network_order(FILE *file)
         printf("error: failed to read file\n");
         exit(1);
     };
+
     return ((num >> 24) & 0xff) |
            ((num << 8) & 0xff0000) |
            ((num >> 8) & 0xff00) |
@@ -108,11 +109,8 @@ double timestamp()
 
 typedef struct Network
 {
-    double **neurons;
     double **weights;
     double **biases;
-    double **weights_grad;
-    double **biases_grad;
     int *dims;
     int ndim;
 } Network;
@@ -120,21 +118,15 @@ typedef struct Network
 Network network_create(int ndim, int *dims)
 {
     Network network = {
-        .neurons = malloc(ndim * sizeof(double *)),
         .weights = malloc(ndim * sizeof(double *)),
         .biases = malloc(ndim * sizeof(double *)),
-        .weights_grad = malloc(ndim * sizeof(double *)),
-        .biases_grad = malloc(ndim * sizeof(double *)),
         .dims = malloc(ndim * sizeof(int)),
-        ndim = ndim,
+        .ndim = ndim,
     };
     for (int i = 1; i < ndim; i++)
     {
-        network.neurons[i] = random_array(dims[i]);
         network.weights[i] = random_array(dims[i] * dims[i - 1]);
         network.biases[i] = random_array(dims[i]);
-        network.weights_grad[i] = malloc(dims[i] * dims[i - 1] * sizeof(double));
-        network.biases_grad[i] = malloc(dims[i] * sizeof(double));
     }
     for (int i = 0; i < ndim; i++)
     {
@@ -147,40 +139,31 @@ void network_destroy(Network network)
 {
     for (int i = 1; i < network.ndim; i++)
     {
-        free(network.neurons[i]);
         free(network.weights[i]);
         free(network.biases[i]);
-        free(network.weights_grad[i]);
-        free(network.biases_grad[i]);
     }
-    free(network.neurons);
     free(network.weights);
     free(network.biases);
-    free(network.weights_grad);
-    free(network.biases_grad);
     free(network.dims);
 }
 
 // MACHINE LEARNING
 
-double compute_loss(Network network, double *label)
+double compute_loss(int size, double *label, double *a)
 {
     double loss = 0;
-    for (int i = 0; i < network.dims[network.ndim - 1]; i++)
+    for (int i = 0; i < size; i++)
     {
-        double tmp = network.neurons[network.ndim - 1][i] - label[i];
+        double tmp = a[i] - label[i];
         loss += tmp * tmp;
     }
     return loss;
 }
 
-void forward(Network network, double *inputs)
+void forward(Network network, double **a)
 {
-    network.neurons[0] = inputs;
-
     int ndim = network.ndim;
     int *dims = network.dims;
-    double **a = network.neurons;
     double **w = network.weights;
     double **b = network.biases;
 
@@ -199,22 +182,19 @@ void forward(Network network, double *inputs)
     }
 }
 
-void backward(Network network, double *label)
+void backward(Network network, double *label, double **a, double **w_grads, double **b_grads)
 {
     int ndim = network.ndim;
     int *dims = network.dims;
-    double **a = network.neurons;
     double **w = network.weights;
-    double **w_grad = network.weights_grad;
-    double **b_grad = network.biases_grad;
 
     int l = ndim - 1;
     for (int i = 0; i < dims[l]; i++)
     {
-        b_grad[l][i] = 2 * (a[l][i] - label[i]) * a[l][i] * (1 - a[l][i]);
+        b_grads[l][i] = 2 * (a[l][i] - label[i]) * a[l][i] * (1 - a[l][i]);
         for (int j = 0; j < dims[l - 1]; j++)
         {
-            w_grad[l][i * dims[l - 1] + j] = a[l - 1][j] * b_grad[l][i];
+            w_grads[l][i * dims[l - 1] + j] = a[l - 1][j] * b_grads[l][i];
         }
     }
 
@@ -222,16 +202,16 @@ void backward(Network network, double *label)
     {
         for (int i = 0; i < dims[l]; i++)
         {
-            b_grad[l][i] = 0;
+            b_grads[l][i] = 0;
             for (int j = 0; j < dims[l + 1]; j++)
             {
-                b_grad[l][i] += w[l + 1][j * dims[l] + i] * b_grad[l + 1][j];
+                b_grads[l][i] += w[l + 1][j * dims[l] + i] * b_grads[l + 1][j];
             }
-            b_grad[l][i] *= a[l][i] * (1 - a[l][i]);
+            b_grads[l][i] *= a[l][i] * (1 - a[l][i]);
 
             for (int j = 0; j < network.dims[l - 1]; j++)
             {
-                w_grad[l][i * dims[l - 1] + j] = a[l - 1][j] * b_grad[l][i];
+                w_grads[l][i * dims[l - 1] + j] = a[l - 1][j] * b_grads[l][i];
             }
         }
     }
@@ -242,12 +222,23 @@ double update_mini_batch(Network network, Image *images, int batch_size, double
     int ndim = network.ndim;
     int *dims = network.dims;
 
-    // allocate arrays to accumulate gradients
+    // allocate variable arrays
+    double **neurons = malloc(ndim * sizeof(double *));
+
+    double **weights_grads = malloc(ndim * sizeof(double *));
+    double **biases_grads = malloc(ndim * sizeof(double *));
+
     double **weights_grad = malloc(ndim * sizeof(double *));
     double **biases_grad = malloc(ndim * sizeof(double *));
 
+    neurons[0] = calloc(sizeof(double), dims[0]);
     for (int l = 1; l < ndim; l++)
     {
+        neurons[l] = calloc(sizeof(double), dims[l]);
+
+        weights_grads[l] = calloc(sizeof(double), dims[l] * dims[l - 1]);
+        biases_grads[l] = calloc(sizeof(double), dims[l]);
+
         weights_grad[l] = calloc(sizeof(double), dims[l] * dims[l - 1]);
         biases_grad[l] = calloc(sizeof(double), dims[l]);
     }
@@ -256,20 +247,21 @@ double update_mini_batch(Network network, Image *images, int batch_size, double
     double loss = 0;
     for (int b = 0; b < batch_size; b++)
     {
-        forward(network, images[b].data);
-        loss += compute_loss(network, images[b].label);
+        neurons[0] = images[b].data;
+        forward(network, neurons);
+        loss += compute_loss(dims[ndim - 1], images[b].label, neurons[ndim - 1]);
 
-        backward(network, images[b].label);
+        backward(network, images[b].label, neurons, weights_grads, biases_grads);
 
         for (int l = 1; l < ndim; l++)
         {
             for (int i = 0; i < dims[l]; i++)
             {
-                biases_grad[l][i] += network.biases_grad[l][i];
+                biases_grad[l][i] += biases_grads[l][i];
                 for (int j = 0; j < dims[l - 1]; j++)
                 {
                     int idx = i * dims[l - 1] + j;
-                    weights_grad[l][idx] += network.weights_grad[l][idx];
+                    weights_grad[l][idx] += weights_grads[l][idx];
                 }
             }
         }
@@ -290,12 +282,19 @@ double update_mini_batch(Network network, Image *images, int batch_size, double
         }
     }
 
-    // free gradient arrays
+    // free variable arrays
     for (int i = 1; i < network.ndim; i++)
     {
+        free(neurons[i]);
+        free(weights_grads[i]);
+        free(biases_grads[i]);
         free(weights_grad[i]);
         free(biases_grad[i]);
     }
+
+    free(neurons);
+    free(weights_grads);
+    free(biases_grads);
     free(weights_grad);
     free(biases_grad);
 
diff --git a/src/main.c b/src/main.c
index 83d5fa4..3b88cab 100644
--- a/src/main.c
+++ b/src/main.c
@@ -27,11 +27,18 @@ int train(int batch_size, int ndim, int *dims_hidden, int epochs, double learnin
         epoch(network, dataset, batch_size, learning_rate);
     }
 
+    double **neurons = malloc(sizeof(double *) * ndim);
+    for (int l = 0; l < ndim; l++)
+    {
+        neurons[l] = calloc(sizeof(double), dims[l]);
+    }
+
     int predicted_correctly = 0;
     for (int i = 0; i < dataset.size; i++)
     {
-        forward(network, dataset.images[i].data);
-        predicted_correctly += arg_max(network.neurons[network.ndim - 1]) == arg_max(dataset.images[i].label);
+        neurons[0] = dataset.images[i].data;
+        forward(network, neurons);
+        predicted_correctly += arg_max(neurons[ndim - 1]) == arg_max(dataset.images[i].label);
     }
     printf("predicted: %d, accurarcy: %f\n", predicted_correctly, ((double)predicted_correctly) / dataset.size);
 
@@ -61,12 +68,28 @@ int bench()
     };
     Network network = network_create(ndim, dims);
     printf("created network of size %d", dims[0]);
+
     for (int i = 1; i < ndim; i++)
     {
         printf("x%d", dims[i]);
     }
     printf("\n");
 
+    // allocate variable arrays
+    double **neurons = malloc(ndim * sizeof(double *));
+
+    double **weights_grads = malloc(ndim * sizeof(double *));
+    double **biases_grads = malloc(ndim * sizeof(double *));
+
+    for (int l = 1; l < ndim; l++)
+    {
+        neurons[l] = calloc(sizeof(double), dims[l]);
+
+        weights_grads[l] = calloc(sizeof(double), dims[l] * dims[l - 1]);
+        biases_grads[l] = calloc(sizeof(double), dims[l]);
+    }
+    neurons[0] = dataset.images[0].data;
+
     int n_passes = 100;
 
     {
@@ -74,7 +97,7 @@ int bench()
         double start = timestamp();
         for (int i = 0; i < n_passes; i++)
         {
-            forward(network, dataset.images[i].data);
+            forward(network, neurons);
         }
         double end = timestamp();
         printf("took: %.3f seconds (%d passes)\n", end - start, n_passes);
@@ -85,12 +108,24 @@ int bench()
         double start = timestamp();
         for (int i = 0; i < n_passes; i++)
         {
-            backward(network, dataset.images[i].label);
+            backward(network, dataset.images[i].label, neurons, weights_grads, biases_grads);
         }
         double end = timestamp();
         printf("took: %.3f seconds (%d passes)\n", end - start, n_passes);
     }
 
+    // free variable arrays
+    for (int i = 1; i < network.ndim; i++)
+    {
+        free(neurons[i]);
+        free(weights_grads[i]);
+        free(biases_grads[i]);
+    }
+
+    free(neurons);
+    free(weights_grads);
+    free(biases_grads);
+
     return 0;
 }
 
@@ -118,11 +153,18 @@ int run(char *model_path)
     Dataset dataset = load_mnist_dataset("mnist/t10k-labels-idx1-ubyte", "mnist/t10k-images-idx3-ubyte");
     printf("loaded dataset with %d images\n", dataset.size);
 
+    double **neurons = malloc(sizeof(double *) * network.ndim);
+    for (int l = 0; l < network.ndim; l++)
+    {
+        neurons[l] = calloc(sizeof(double), network.dims[l]);
+    }
+
     int predicted_correctly = 0;
     for (int i = 0; i < dataset.size; i++)
     {
-        forward(network, dataset.images[i].data);
-        predicted_correctly += arg_max(network.neurons[network.ndim - 1]) == arg_max(dataset.images[i].label);
+        neurons[0] = dataset.images[i].data;
+        forward(network, neurons);
+        predicted_correctly += arg_max(neurons[network.ndim - 1]) == arg_max(dataset.images[i].label);
     }
     printf("predicted: %d, accurarcy: %f\n", predicted_correctly, ((double)predicted_correctly) / dataset.size);
 
diff --git a/src/test_backprop.c b/src/test_backprop.c
index eaeb184..03c67fb 100644
--- a/src/test_backprop.c
+++ b/src/test_backprop.c
@@ -5,6 +5,21 @@ void test_back_propagation()
     int dims[] = {2, 3, 4, 3, 2};
     Network network = network_create(5, dims);
 
+    // allocate variable arrays
+    double **neurons = malloc(network.ndim * sizeof(double *));
+
+    double **weights_grads = malloc(network.ndim * sizeof(double *));
+    double **biases_grads = malloc(network.ndim * sizeof(double *));
+
+    neurons[0] = calloc(sizeof(double), dims[0]);
+    for (int l = 1; l < network.ndim; l++)
+    {
+        neurons[l] = calloc(sizeof(double), dims[l]);
+
+        weights_grads[l] = calloc(sizeof(double), dims[l] * dims[l - 1]);
+        biases_grads[l] = calloc(sizeof(double), dims[l]);
+    }
+
     // fill network
     network.weights[1][0] = 0.30742281675338745;
     network.weights[1][1] = 0.6340786814689636;
@@ -114,29 +129,44 @@ void test_back_propagation()
     nabla_b4[1] = 0.06983662396669388;
 
     // fill inputs and label
-    double *inputs = malloc(2 * sizeof(double));
-    inputs[0] = 0.49625658988952637;
-    inputs[1] = 0.7682217955589294;
+    // double *inputs = malloc(2 * sizeof(double));
+    printf("reached this statement\n");
+    neurons[0][0] = 0.49625658988952637;
+    neurons[0][1] = 0.7682217955589294;
+    printf("reached this statement after neurons\n");
+
     double *label = malloc(2 * sizeof(double));
     label[0] = 0.08847743272781372;
     label[1] = 0.13203048706054688;
 
     // run backprop
-    forward(network, inputs);
-    double loss = compute_loss(network, label);
-    backward(network, label);
+
+    forward(network, neurons);
+    double loss = compute_loss(network.dims[network.ndim - 1], label, neurons[network.ndim - 1]);
+    backward(network, label, neurons, weights_grads, biases_grads);
     // compare loss
     assert_scalar("loss", 1.131441593170166, loss);
 
     // compare gradients
-    assert_array("nabla_w1", 6, nabla_w1, network.weights_grad[1]);
-    assert_array("nabla_w2", 12, nabla_w2, network.weights_grad[2]);
-    assert_array("nabla_w3", 12, nabla_w3, network.weights_grad[3]);
-    assert_array("nabla_w4", 6, nabla_w4, network.weights_grad[4]);
-    assert_array("nabla_b1", 3, nabla_b1, network.biases_grad[1]);
-    assert_array("nabla_b2", 4, nabla_b2, network.biases_grad[2]);
-    assert_array("nabla_b3", 3, nabla_b3, network.biases_grad[3]);
-    assert_array("nabla_b4", 2, nabla_b4, network.biases_grad[4]);
+    assert_array("nabla_w1", 6, nabla_w1, weights_grads[1]);
+    assert_array("nabla_w2", 12, nabla_w2, weights_grads[2]);
+    assert_array("nabla_w3", 12, nabla_w3, weights_grads[3]);
+    assert_array("nabla_w4", 6, nabla_w4, weights_grads[4]);
+    assert_array("nabla_b1", 3, nabla_b1, biases_grads[1]);
+    assert_array("nabla_b2", 4, nabla_b2, biases_grads[2]);
+    assert_array("nabla_b3", 3, nabla_b3, biases_grads[3]);
+    assert_array("nabla_b4", 2, nabla_b4, biases_grads[4]);
+
+    for (int i = 1; i < network.ndim; i++)
+    {
+        free(neurons[i]);
+        free(weights_grads[i]);
+        free(biases_grads[i]);
+    }
+
+    free(neurons);
+    free(weights_grads);
+    free(biases_grads);
 
     // free gradients
     free(nabla_w1);
@@ -147,9 +177,10 @@ void test_back_propagation()
     free(nabla_b2);
     free(nabla_b3);
     free(nabla_b4);
+
     // free inputs and labels
-    free(inputs);
     free(label);
+
     // destroy network
     network_destroy(network);
 }