Error when training: "closure_loss" is NoneType #6806
-
Hi all, I am trying to train my LightningModule but I seem to keep getting the error I think it might be something to do with how I format my LightningModule, so here is what my LightningModule looks like class HPAModelV1(pl.LightningModule):
def __init__(self):
super().__init__()
#self.lossfunc = F.cross_entropy
self.lossfunc = F.nll_loss
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=3, padding=7)
self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(16, 16, kernel_size=5, stride=1, padding=1)
self.dense = nn.Linear(16, 19)
def forward(self, x): #input size is (256, 3, 256, 256)
x = x.float()
out = self.conv1(x)
out = F.relu(out)
out = F.max_pool2d(out, 3) # output is (bs, 16, 30, 30)
out = self.conv2(out)
out = F.relu(out)
out = F.max_pool2d(out, 3) # output is (bs, 16, 10, 10)
out = self.conv3(out)
out = F.relu(out)
out = F.max_pool2d(out, 8) # output is (bs, 16, 1, 1)
# dense layer
out = out.reshape(out.size()[0], 16)
out = self.dense(out)
return out
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
return optimizer
def training_step(self, batch, batchidx):
# set labels and data
x = batch[0]
y = batch[1]
# compute loop
preds = self(x)
probs = F.softmax(preds, dim=1)
# compute the loss function
J = self.lossfunc(probs, y)
# compute accuracy
acc = accuracy(probs, y)
#log for weights and biases
self.log('training loss (step)', J)
self.log('training accuracy (step)', acc)
self.log('mean training loss (epoch)', J, on_step=False, on_epoch=True)
self.log('mean training accuracy (epoch)', acc, on_step=False, on_epoch=True)
# add information to the progress bar
pbar = {'train_acc': acc, 'train_loss' : J}
return J, acc
def validation_step(self, valbatch, valbatchidx):
# use the same training step on the val set
valJ, valAcc = self.training_step(valbatch, valbatchidx)
# log for wb
self.log('validation loss (step)', valJ)
self.log('validation accuracy (step)', valAcc)
self.log('mean validation loss (epoch)', valJ, on_step=False, on_epoch=True)
self.log('mean validation accuracy (epoch)', valAcc, on_step=False, on_epoch=True)
return valJ, valAcc
def validation_epoch_end(self, valStepOutputs):
pass And if it may help in diagnosing the cause of the issue, here is the stack trace and output of of the Trainer:
Thank you, and sorry for all the text |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
Hi @adamDhalla,
https://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#training-step |
Beta Was this translation helpful? Give feedback.
Hi @adamDhalla,
training_step
needs to return one of:Tensor
- The loss tensordict
- A dictionary. Can include any keys, but must include the key'loss'
None
- Training will skip to the next batchhttps://pytorch-lightning.readthedocs.io/en/latest/common/lightning_module.html#training-step