|
1 | | -# -*- coding: utf-8 -*- |
2 | | -""" |
3 | | -nn package |
4 | | -========== |
| 1 | +nn Package |
| 2 | +=============== |
5 | 3 |
|
6 | | -We’ve redesigned the nn package, so that it’s fully integrated with |
7 | | -autograd. Let's review the changes. |
| 4 | +This tutorial is out of date. You'll be redirected to the new tutorial in 3 seconds: https://pytorch.org/tutorials/beginner/nn_tutorial.html |
8 | 5 |
|
9 | | -**Replace containers with autograd:** |
| 6 | +.. raw:: html |
10 | 7 |
|
11 | | - You no longer have to use Containers like ``ConcatTable``, or modules like |
12 | | - ``CAddTable``, or use and debug with nngraph. We will seamlessly use |
13 | | - autograd to define our neural networks. For example, |
14 | | - |
15 | | - * ``output = nn.CAddTable():forward({input1, input2})`` simply becomes |
16 | | - ``output = input1 + input2`` |
17 | | - * ``output = nn.MulConstant(0.5):forward(input)`` simply becomes |
18 | | - ``output = input * 0.5`` |
19 | | - |
20 | | -**State is no longer held in the module, but in the network graph:** |
21 | | - |
22 | | - Using recurrent networks should be simpler because of this reason. If |
23 | | - you want to create a recurrent network, simply use the same Linear layer |
24 | | - multiple times, without having to think about sharing weights. |
25 | | - |
26 | | - .. figure:: /_static/img/torch-nn-vs-pytorch-nn.png |
27 | | - :alt: torch-nn-vs-pytorch-nn |
28 | | - |
29 | | - torch-nn-vs-pytorch-nn |
30 | | - |
31 | | -**Simplified debugging:** |
32 | | - |
33 | | - Debugging is intuitive using Python’s pdb debugger, and **the debugger |
34 | | - and stack traces stop at exactly where an error occurred.** What you see |
35 | | - is what you get. |
36 | | - |
37 | | -Example 1: ConvNet |
38 | | ------------------- |
39 | | - |
40 | | -Let’s see how to create a small ConvNet. |
41 | | - |
42 | | -All of your networks are derived from the base class ``nn.Module``: |
43 | | - |
44 | | -- In the constructor, you declare all the layers you want to use. |
45 | | -- In the forward function, you define how your model is going to be |
46 | | - run, from input to output |
47 | | -""" |
48 | | - |
49 | | -import torch |
50 | | -import torch.nn as nn |
51 | | -import torch.nn.functional as F |
52 | | - |
53 | | - |
54 | | -class MNISTConvNet(nn.Module): |
55 | | - |
56 | | - def __init__(self): |
57 | | - # this is the place where you instantiate all your modules |
58 | | - # you can later access them using the same names you've given them in |
59 | | - # here |
60 | | - super(MNISTConvNet, self).__init__() |
61 | | - self.conv1 = nn.Conv2d(1, 10, 5) |
62 | | - self.pool1 = nn.MaxPool2d(2, 2) |
63 | | - self.conv2 = nn.Conv2d(10, 20, 5) |
64 | | - self.pool2 = nn.MaxPool2d(2, 2) |
65 | | - self.fc1 = nn.Linear(320, 50) |
66 | | - self.fc2 = nn.Linear(50, 10) |
67 | | - |
68 | | - # it's the forward function that defines the network structure |
69 | | - # we're accepting only a single input in here, but if you want, |
70 | | - # feel free to use more |
71 | | - def forward(self, input): |
72 | | - x = self.pool1(F.relu(self.conv1(input))) |
73 | | - x = self.pool2(F.relu(self.conv2(x))) |
74 | | - |
75 | | - # in your model definition you can go full crazy and use arbitrary |
76 | | - # python code to define your model structure |
77 | | - # all these are perfectly legal, and will be handled correctly |
78 | | - # by autograd: |
79 | | - # if x.gt(0) > x.numel() / 2: |
80 | | - # ... |
81 | | - # |
82 | | - # you can even do a loop and reuse the same module inside it |
83 | | - # modules no longer hold ephemeral state, so you can use them |
84 | | - # multiple times during your forward pass |
85 | | - # while x.norm(2) < 10: |
86 | | - # x = self.conv1(x) |
87 | | - |
88 | | - x = x.view(x.size(0), -1) |
89 | | - x = F.relu(self.fc1(x)) |
90 | | - x = F.relu(self.fc2(x)) |
91 | | - return x |
92 | | - |
93 | | -############################################################### |
94 | | -# Let's use the defined ConvNet now. |
95 | | -# You create an instance of the class first. |
96 | | - |
97 | | - |
98 | | -net = MNISTConvNet() |
99 | | -print(net) |
100 | | - |
101 | | -######################################################################## |
102 | | -# .. note:: |
103 | | -# |
104 | | -# ``torch.nn`` only supports mini-batches The entire ``torch.nn`` |
105 | | -# package only supports inputs that are a mini-batch of samples, and not |
106 | | -# a single sample. |
107 | | -# |
108 | | -# For example, ``nn.Conv2d`` will take in a 4D Tensor of |
109 | | -# ``nSamples x nChannels x Height x Width``. |
110 | | -# |
111 | | -# If you have a single sample, just use ``input.unsqueeze(0)`` to add |
112 | | -# a fake batch dimension. |
113 | | -# |
114 | | -# Create a mini-batch containing a single sample of random data and send the |
115 | | -# sample through the ConvNet. |
116 | | - |
117 | | -input = torch.randn(1, 1, 28, 28) |
118 | | -out = net(input) |
119 | | -print(out.size()) |
120 | | - |
121 | | -######################################################################## |
122 | | -# Define a dummy target label and compute error using a loss function. |
123 | | - |
124 | | -target = torch.tensor([3], dtype=torch.long) |
125 | | -loss_fn = nn.CrossEntropyLoss() # LogSoftmax + ClassNLL Loss |
126 | | -err = loss_fn(out, target) |
127 | | -err.backward() |
128 | | - |
129 | | -print(err) |
130 | | - |
131 | | -######################################################################## |
132 | | -# The output of the ConvNet ``out`` is a ``Tensor``. We compute the loss |
133 | | -# using that, and that results in ``err`` which is also a ``Tensor``. |
134 | | -# Calling ``.backward`` on ``err`` hence will propagate gradients all the |
135 | | -# way through the ConvNet to it’s weights |
136 | | -# |
137 | | -# Let's access individual layer weights and gradients: |
138 | | - |
139 | | -print(net.conv1.weight.grad.size()) |
140 | | - |
141 | | -######################################################################## |
142 | | -print(net.conv1.weight.data.norm()) # norm of the weight |
143 | | -print(net.conv1.weight.grad.data.norm()) # norm of the gradients |
144 | | - |
145 | | -######################################################################## |
146 | | -# Forward and Backward Function Hooks |
147 | | -# ----------------------------------- |
148 | | -# |
149 | | -# We’ve inspected the weights and the gradients. But how about inspecting |
150 | | -# / modifying the output and grad\_output of a layer? |
151 | | -# |
152 | | -# We introduce **hooks** for this purpose. |
153 | | -# |
154 | | -# You can register a function on a ``Module`` or a ``Tensor``. |
155 | | -# The hook can be a forward hook or a backward hook. |
156 | | -# The forward hook will be executed when a forward call is executed. |
157 | | -# The backward hook will be executed in the backward phase. |
158 | | -# Let’s look at an example. |
159 | | -# |
160 | | -# We register a forward hook on conv2 and print some information |
161 | | - |
162 | | - |
163 | | -def printnorm(self, input, output): |
164 | | - # input is a tuple of packed inputs |
165 | | - # output is a Tensor. output.data is the Tensor we are interested |
166 | | - print('Inside ' + self.__class__.__name__ + ' forward') |
167 | | - print('') |
168 | | - print('input: ', type(input)) |
169 | | - print('input[0]: ', type(input[0])) |
170 | | - print('output: ', type(output)) |
171 | | - print('') |
172 | | - print('input size:', input[0].size()) |
173 | | - print('output size:', output.data.size()) |
174 | | - print('output norm:', output.data.norm()) |
175 | | - |
176 | | - |
177 | | -net.conv2.register_forward_hook(printnorm) |
178 | | - |
179 | | -out = net(input) |
180 | | - |
181 | | -######################################################################## |
182 | | -# |
183 | | -# We register a backward hook on conv2 and print some information |
184 | | - |
185 | | - |
186 | | -def printgradnorm(self, grad_input, grad_output): |
187 | | - print('Inside ' + self.__class__.__name__ + ' backward') |
188 | | - print('Inside class:' + self.__class__.__name__) |
189 | | - print('') |
190 | | - print('grad_input: ', type(grad_input)) |
191 | | - print('grad_input[0]: ', type(grad_input[0])) |
192 | | - print('grad_output: ', type(grad_output)) |
193 | | - print('grad_output[0]: ', type(grad_output[0])) |
194 | | - print('') |
195 | | - print('grad_input size:', grad_input[0].size()) |
196 | | - print('grad_output size:', grad_output[0].size()) |
197 | | - print('grad_input norm:', grad_input[0].norm()) |
198 | | - |
199 | | - |
200 | | -net.conv2.register_backward_hook(printgradnorm) |
201 | | - |
202 | | -out = net(input) |
203 | | -err = loss_fn(out, target) |
204 | | -err.backward() |
205 | | - |
206 | | -######################################################################## |
207 | | -# A full and working MNIST example is located here |
208 | | -# https://github.com/pytorch/examples/tree/master/mnist |
209 | | -# |
210 | | -# Example 2: Recurrent Net |
211 | | -# ------------------------ |
212 | | -# |
213 | | -# Next, let’s look at building recurrent nets with PyTorch. |
214 | | -# |
215 | | -# Since the state of the network is held in the graph and not in the |
216 | | -# layers, you can simply create an nn.Linear and reuse it over and over |
217 | | -# again for the recurrence. |
218 | | - |
219 | | - |
220 | | -class RNN(nn.Module): |
221 | | - |
222 | | - # you can also accept arguments in your model constructor |
223 | | - def __init__(self, data_size, hidden_size, output_size): |
224 | | - super(RNN, self).__init__() |
225 | | - |
226 | | - self.hidden_size = hidden_size |
227 | | - input_size = data_size + hidden_size |
228 | | - |
229 | | - self.i2h = nn.Linear(input_size, hidden_size) |
230 | | - self.h2o = nn.Linear(hidden_size, output_size) |
231 | | - |
232 | | - def forward(self, data, last_hidden): |
233 | | - input = torch.cat((data, last_hidden), 1) |
234 | | - hidden = self.i2h(input) |
235 | | - output = self.h2o(hidden) |
236 | | - return hidden, output |
237 | | - |
238 | | - |
239 | | -rnn = RNN(50, 20, 10) |
240 | | - |
241 | | -######################################################################## |
242 | | -# |
243 | | -# A more complete Language Modeling example using LSTMs and Penn Tree-bank |
244 | | -# is located |
245 | | -# `here <https://github.com/pytorch/examples/tree/master/word\_language\_model>`_ |
246 | | -# |
247 | | -# PyTorch by default has seamless CuDNN integration for ConvNets and |
248 | | -# Recurrent Nets |
249 | | - |
250 | | -loss_fn = nn.MSELoss() |
251 | | - |
252 | | -batch_size = 10 |
253 | | -TIMESTEPS = 5 |
254 | | - |
255 | | -# Create some fake data |
256 | | -batch = torch.randn(batch_size, 50) |
257 | | -hidden = torch.zeros(batch_size, 20) |
258 | | -target = torch.zeros(batch_size, 10) |
259 | | - |
260 | | -loss = 0 |
261 | | -for t in range(TIMESTEPS): |
262 | | - # yes! you can reuse the same network several times, |
263 | | - # sum up the losses, and call backward! |
264 | | - hidden, output = rnn(batch, hidden) |
265 | | - loss += loss_fn(output, target) |
266 | | -loss.backward() |
| 8 | + <meta http-equiv="Refresh" content="3; url='https://pytorch.org/tutorials/beginner/nn_tutorial.html'" /> |
0 commit comments