diff --git a/README.md b/README.md
index afcdb0a..a162386 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,18 @@ The exact speedup you get depends on how much error you can tolerate--higher spe
 ## Installation
 To run this script, you will need [PyTorch](http://pytorch.org) and a CUDA-capable GPU. If you wish to run it on CPU, just remove all the .cuda() calls.
 
+Install ```path``` by 
+```sh
+pip install path.py
+```
+
 ## Running
+Create a folder named ```logs``` by 
+
+```sh
+mkdir logs
+```
+
 To run with default parameters, simply call
 
 ```sh
diff --git a/densenet.py b/densenet.py
index cd90310..e5d48ab 100644
--- a/densenet.py
+++ b/densenet.py
@@ -240,5 +240,5 @@ def forward(self, x):
         out = self.trans2(self.dense2(out))
         out = self.dense3(out)
         out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
-        out = F.log_softmax(self.fc(out))
+        out = F.log_softmax(self.fc(out), dim=-1)
         return out
\ No newline at end of file
diff --git a/train.py b/train.py
index 904f4e9..fbea858 100644
--- a/train.py
+++ b/train.py
@@ -27,7 +27,6 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.autograd import Variable as V
 import torchvision.datasets as dset
 import torchvision.transforms as transforms
 from torch.utils.data import DataLoader
@@ -174,22 +173,23 @@ def train_test(depth, growth_rate, dropout, augment,
     # y: target labels
     def train_fn(x, y):
         net.optim.zero_grad()
-        output = net(V(x.cuda()))
-        loss = F.nll_loss(output, V(y.cuda()))
+        output = net(x.cuda())
+        loss = F.nll_loss(output, y.cuda())
         loss.backward()
         net.optim.step()
-        return loss.data[0]
+        return loss.item()
 
     # Testing function, returns test loss and test error for a batch
     # x: input data
     # y: target labels
     def test_fn(x, y):
-        output = net(V(x.cuda(), volatile=True))
-        test_loss = F.nll_loss(output, V(y.cuda(), volatile=True)).data[0]
+        with torch.no_grad():
+            output = net(x.cuda())
+            test_loss = F.nll_loss(output, y.cuda()).item()
 
-        # Get the index of the max log-probability as the prediction.
-        pred = output.data.max(1)[1].cpu()
-        test_error = pred.ne(y).sum()
+            # Get the index of the max log-probability as the prediction.
+            pred = output.data.max(1)[1].cpu()
+            test_error = pred.ne(y).sum()
 
         return test_loss, test_error
 
diff --git a/utils.py b/utils.py
index ee7307c..019d0ec 100644
--- a/utils.py
+++ b/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from __future__ import print_function
 import sys