@@ -93,7 +93,9 @@ class CoolSystem(pl.LightningModule):
93
93
# REQUIRED
94
94
x, y = batch
95
95
y_hat = self .forward(x)
96
- return {' loss' : F.cross_entropy(y_hat, y)}
96
+ loss = F.cross_entropy(y_hat, y)
97
+ tensorboard_logs = {' train_loss' : loss}
98
+ return {' loss' : loss, ' log' : tensorboard_logs}
97
99
98
100
def validation_step (self , batch , batch_nb ):
99
101
# OPTIONAL
@@ -104,7 +106,8 @@ class CoolSystem(pl.LightningModule):
104
106
def validation_end (self , outputs ):
105
107
# OPTIONAL
106
108
avg_loss = torch.stack([x[' val_loss' ] for x in outputs]).mean()
107
- return {' avg_val_loss' : avg_loss}
109
+ tensorboard_logs = {' val_loss' : avg_loss}
110
+ return {' avg_val_loss' : avg_loss, ' log' : tensorboard_logs}
108
111
109
112
def configure_optimizers (self ):
110
113
# REQUIRED
@@ -138,30 +141,27 @@ trainer = Trainer()
138
141
trainer.fit(model)
139
142
```
140
143
141
- Or with tensorboard logger and some options turned on such as multi-gpu, etc...
142
- ``` python
143
- from test_tube import Experiment
144
-
145
- # PyTorch summarywriter with a few bells and whistles
146
- exp = Experiment(save_dir = os.getcwd())
144
+ Trainer sets up a tensorboard logger, early stopping and checkpointing by default (you can modify all of them or
145
+ use something other than tensorboard).
147
146
147
+ Here are more advanced examples
148
+ ``` python
148
149
# train on cpu using only 10% of the data (for demo purposes)
149
- # pass in experiment for automatic tensorboard logging.
150
- trainer = Trainer(experiment = exp, max_nb_epochs = 1 , train_percent_check = 0.1 )
150
+ trainer = Trainer(max_nb_epochs = 1 , train_percent_check = 0.1 )
151
151
152
152
# train on 4 gpus (lightning chooses GPUs for you)
153
- # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=4)
153
+ # trainer = Trainer(max_nb_epochs=1, gpus=4)
154
154
155
155
# train on 4 gpus (you choose GPUs)
156
- # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=[0, 1, 3, 7])
156
+ # trainer = Trainer(max_nb_epochs=1, gpus=[0, 1, 3, 7])
157
157
158
158
# train on 32 gpus across 4 nodes (make sure to submit appropriate SLURM job)
159
- # trainer = Trainer(experiment=exp, max_nb_epochs=1, gpus=8, nb_gpu_nodes=4)
159
+ # trainer = Trainer(max_nb_epochs=1, gpus=8, nb_gpu_nodes=4)
160
160
161
161
# train (1 epoch only here for demo)
162
162
trainer.fit(model)
163
163
164
- # view tensorflow logs
164
+ # view tensorboard logs
165
165
print (' View tensorboard logs by running\n tensorboard --logdir %s ' % os.getcwd())
166
166
print (' and going to http://localhost:6006 on your browser' )
167
167
```
@@ -176,7 +176,7 @@ trainer.test()
176
176
Everything in gray!
177
177
You define the blue parts using the LightningModule interface:
178
178
179
- ![ Ouverview ] ( ./docs/source/_static/overview_flat.jpg )
179
+ ![ Overview ] ( ./docs/source/_static/overview_flat.jpg )
180
180
181
181
``` python
182
182
# what to do in the training loop
@@ -251,12 +251,13 @@ def validation_end(self, outputs):
251
251
252
252
val_loss_mean /= len (outputs)
253
253
val_acc_mean /= len (outputs)
254
- tqdm_dict = {' val_loss' : val_loss_mean.item(), ' val_acc' : val_acc_mean.item()}
255
- return tqdm_dict
254
+ logs = {' val_loss' : val_loss_mean.item(), ' val_acc' : val_acc_mean.item()}
255
+ result = {' log' : logs}
256
+ return result
256
257
```
257
258
258
259
## Tensorboard
259
- Lightning is fully integrated with tensorboard.
260
+ Lightning is fully integrated with tensorboard, MLFlow and supports any logging module .
260
261
261
262
![ tensorboard-support] ( ./docs/source/_static/tf_loss.png )
262
263
0 commit comments