Skip to content

Commit e241b37

Browse files
committed
Another round of md -> mdx
1 parent 38a9d53 commit e241b37

File tree

6 files changed

+95
-148
lines changed

6 files changed

+95
-148
lines changed

.astro/types.d.ts

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -213,20 +213,20 @@ declare module 'astro:content' {
213213
collection: "post";
214214
data: InferEntrySchema<"post">
215215
} & { render(): Render[".mdx"] };
216-
"convnet-conv-layer.md": {
217-
id: "convnet-conv-layer.md";
216+
"convnet-conv-layer.mdx": {
217+
id: "convnet-conv-layer.mdx";
218218
slug: "convnet-conv-layer";
219219
body: string;
220220
collection: "post";
221221
data: InferEntrySchema<"post">
222-
} & { render(): Render[".md"] };
223-
"convnet-maxpool-layer.md": {
224-
id: "convnet-maxpool-layer.md";
222+
} & { render(): Render[".mdx"] };
223+
"convnet-maxpool-layer.mdx": {
224+
id: "convnet-maxpool-layer.mdx";
225225
slug: "convnet-maxpool-layer";
226226
body: string;
227227
collection: "post";
228228
data: InferEntrySchema<"post">
229-
} & { render(): Render[".md"] };
229+
} & { render(): Render[".mdx"] };
230230
"coupled-gan.mdx": {
231231
id: "coupled-gan.mdx";
232232
slug: "coupled-gan";
@@ -276,13 +276,13 @@ declare module 'astro:content' {
276276
collection: "post";
277277
data: InferEntrySchema<"post">
278278
} & { render(): Render[".mdx"] };
279-
"gan-tensorflow.md": {
280-
id: "gan-tensorflow.md";
279+
"gan-tensorflow.mdx": {
280+
id: "gan-tensorflow.mdx";
281281
slug: "gan-tensorflow";
282282
body: string;
283283
collection: "post";
284284
data: InferEntrySchema<"post">
285-
} & { render(): Render[".md"] };
285+
} & { render(): Render[".mdx"] };
286286
"gaussian-anomaly-detection.md": {
287287
id: "gaussian-anomaly-detection.md";
288288
slug: "gaussian-anomaly-detection";
@@ -325,13 +325,13 @@ declare module 'astro:content' {
325325
collection: "post";
326326
data: InferEntrySchema<"post">
327327
} & { render(): Render[".mdx"] };
328-
"jekyll-fb-share.md": {
329-
id: "jekyll-fb-share.md";
328+
"jekyll-fb-share.mdx": {
329+
id: "jekyll-fb-share.mdx";
330330
slug: "jekyll-fb-share";
331331
body: string;
332332
collection: "post";
333333
data: InferEntrySchema<"post">
334-
} & { render(): Render[".md"] };
334+
} & { render(): Render[".mdx"] };
335335
"kl-mle.mdx": {
336336
id: "kl-mle.mdx";
337337
slug: "kl-mle";
@@ -374,13 +374,13 @@ declare module 'astro:content' {
374374
collection: "post";
375375
data: InferEntrySchema<"post">
376376
} & { render(): Render[".mdx"] };
377-
"lstm-backprop.md": {
378-
id: "lstm-backprop.md";
377+
"lstm-backprop.mdx": {
378+
id: "lstm-backprop.mdx";
379379
slug: "lstm-backprop";
380380
body: string;
381381
collection: "post";
382382
data: InferEntrySchema<"post">
383-
} & { render(): Render[".md"] };
383+
} & { render(): Render[".mdx"] };
384384
"manifold-gaussians.mdx": {
385385
id: "manifold-gaussians.mdx";
386386
slug: "manifold-gaussians";
Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Alright, let's define our function:
3333

3434
```python
3535
def conv_forward(X, W, b, stride=1, padding=1):
36-
pass
36+
pass
3737
```
3838

3939
Our conv layer will accept an input in `X: DxCxHxW` dimension, input filter `W: NFxCxHFxHW`, and bias `b: Fx1`, where:
@@ -65,25 +65,18 @@ To make the operation compatible, we will arrange our filter to `1x9`. Now, if w
6565
Let's see the code for that.
6666

6767
```python
68-
6968
# Let this be 3x3 convolution with stride = 1 and padding = 1
70-
7169
# Suppose our X is 5x1x10x10, X_col will be a 9x500 matrix
72-
7370
X_col = im2col_indices(X, h_filter, w_filter, padding=padding, stride=stride)
7471

7572
# Suppose we have 20 of 3x3 filter: 20x1x3x3. W_col will be 20x9 matrix
76-
7773
W_col = W.reshape(n_filters, -1)
7874

7975
# 20x9 x 9x500 = 20x500
80-
8176
out = W_col @ X_col + b
8277

8378
# Reshape back from 20x500 to 5x20x10x10
84-
8579
# i.e. for each of our 5 images, we have 20 results with size of 10x10
86-
8780
out = out.reshape(n_filters, h_out, w_out, n_x)
8881
out = out.transpose(3, 0, 1, 2)
8982
```
@@ -110,17 +103,13 @@ Remember that the matrix we're dealing with, i.e. `dout` is a `5x20x10x10` matri
110103
Next, we will compute the gradient of the the filters `dW`.
111104

112105
```python
113-
114106
# Transpose from 5x20x10x10 into 20x10x10x5, then reshape into 20x500
115-
116107
dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(n_filter, -1)
117108

118109
# 20x500 x 500x9 = 20x9
119-
120110
dW = dout_reshaped @ X_col.T
121111

122112
# Reshape back to 20x1x3x3
123-
124113
dW = dW.reshape(W.shape)
125114
```
126115

@@ -129,17 +118,13 @@ It's similar with the normal feed forward layer, except with more convoluted (ha
129118
Lastly, the input gradient `dX`. We're almost there!
130119

131120
```python
132-
133121
# Reshape from 20x1x3x3 into 20x9
134-
135122
W_reshape = W.reshape(n_filter, -1)
136123

137124
# 9x20 x 20x500 = 9x500
138-
139125
dX_col = W_reshape.T @ dout_reshaped
140126

141127
# Stretched out image to the real image: 9x500 => 5x1x10x10
142-
143128
dX = col2im_indices(dX_col, X.shape, h_filter, w_filter, padding=padding, stride=stride)
144129
```
145130

@@ -150,12 +135,12 @@ Again, it's the same as feed forward layer with some careful reshaping! At the e
150135
Here's the full source code for the forward and backward computation of the conv layer.
151136

152137
```python
153-
def conv*forward(X, W, b, stride=1, padding=1):
154-
cache = W, b, stride, padding
155-
n_filters, d_filter, h_filter, w_filter = W.shape
156-
n_x, d_x, h_x, w_x = X.shape
157-
h_out = (h_x - h_filter + 2 * padding) / stride + 1
158-
w*out = (w_x - w_filter + 2 * padding) / stride + 1
138+
def conv_forward(X, W, b, stride=1, padding=1):
139+
cache = W, b, stride, padding
140+
n_filters, d_filter, h_filter, w_filter = W.shape
141+
n_x, d_x, h_x, w_x = X.shape
142+
h_out = (h_x - h_filter + 2 * padding) / stride + 1
143+
w_out = (w_x - w_filter + 2 * padding) / stride + 1
159144

160145
if not h_out.is_integer() or not w_out.is_integer():
161146
raise Exception('Invalid output dimension!')
@@ -174,8 +159,8 @@ w*out = (w_x - w_filter + 2 * padding) / stride + 1
174159
return out, cache
175160

176161
def conv_backward(dout, cache):
177-
X, W, b, stride, padding, X_col = cache
178-
n_filter, d_filter, h_filter, w_filter = W.shape
162+
X, W, b, stride, padding, X_col = cache
163+
n_filter, d_filter, h_filter, w_filter = W.shape
179164

180165
db = np.sum(dout, axis=(0, 2, 3))
181166
db = db.reshape(n_filter, -1)
@@ -189,10 +174,9 @@ n_filter, d_filter, h_filter, w_filter = W.shape
189174
dX = col2im_indices(dX_col, X.shape, h_filter, w_filter, padding=padding, stride=stride)
190175

191176
return dX, dW, db
192-
193177
```
194178

195-
Also check out the complete code in my repository: <https://github.com/wiseodd/hipsternet>!
179+
Also check out the complete code in my repository: https://github.com/wiseodd/hipsternet!
196180

197181
## Conclusion
198182

@@ -202,6 +186,6 @@ Dealing with multidimensional matrices as we will always encounter in convnet is
202186

203187
## References
204188

205-
- <http://cs231n.github.io/convolutional-networks/>
206-
- <http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip>
207-
- <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>
189+
- http://cs231n.github.io/convolutional-networks/
190+
- http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip
191+
- http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf

src/content/post/convnet-maxpool-layer.md renamed to src/content/post/convnet-maxpool-layer.mdx

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ publishDate: 2016-07-18 02:00
55
tags: [machine learning, programming, python, neural networks]
66
---
77

8+
import BlogImage from "@/components/BlogImage.astro";
9+
810
Traditionally, convnet consists of several layers: convolution, pooling, fully connected, and softmax. Although it's not true anymore with the recent development. A lot of things going on out there and the architecture of convent has been steadily (r)evolving, something like Google's Inception module found in [GoogLeNet](http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf) and the recent ImageNet champion: [ResNet](https://arxiv.org/pdf/1512.03385v1).
911

10-
Nevertheless, conv and pool layers are still the essential foundations of convnet. We've covered the conv layer in the [last post]({% post_url 2016-07-16-convnet-conv-layer %}). Now let's dig into pool layer, especially maxpool layer.
12+
Nevertheless, conv and pool layers are still the essential foundations of convnet. We've covered the conv layer in the last post. Now let's dig into pool layer, especially maxpool layer.
1113

1214
## Pool layer
1315

@@ -19,7 +21,7 @@ The summarization operation could be any summary statistics: average, max, min,
1921

2022
Those are, mainly, the reduction of the dimensionality = less parameter = less computation burden; and slightly more robust model, because we're taking "high level views" of our images, the network will be slightly invariant towards small changes like rotation, translation, etc.
2123

22-
For more about theoritical and best practices about pool layer, head to CS231n lecture page: <http://cs231n.github.io/convolutional-networks/#pool>.
24+
For more about theoritical and best practices about pool layer, head to CS231n lecture page: http://cs231n.github.io/convolutional-networks/#pool.
2325

2426
## Maxpool layer
2527

@@ -32,39 +34,27 @@ It's just the same as conv layer with one exception: max instead of dot product.
3234
As we already know that maxpool layer is similar to conv layer, implementing it is somewhat easier.
3335

3436
```python
35-
3637
# Let say our input X is 5x10x28x28
37-
3838
# Our pooling parameter are: size = 2x2, stride = 2, padding = 0
39-
4039
# i.e. result of 10 filters of 3x3 applied to 5 imgs of 28x28 with stride = 1 and padding = 1
41-
4240
# First, reshape it to 50x1x28x28 to make im2col arranges it fully in column
43-
4441
X_reshaped = X.reshape(n \* d, 1, h, w)
4542

4643
# The result will be 4x9800
47-
4844
# Note if we apply im2col to our 5x10x28x28 input, the result won't be as nice: 40x980
49-
5045
X_col = im2col_indices(X_reshaped, size, size, padding=0, stride=stride)
5146

5247
# Next, at each possible patch location, i.e. at each column, we're taking the max index
53-
5448
max_idx = np.argmax(X_col, axis=0)
5549

5650
# Finally, we get all the max value at each column
57-
5851
# The result will be 1x9800
59-
6052
out = X_col[max_idx, range(max_idx.size)]
6153

6254
# Reshape to the output size: 14x14x5x10
63-
6455
out = out.reshape(h_out, w_out, n, d)
6556

6657
# Transpose to get 5x10x14x14 output
67-
6858
out = out.transpose(2, 3, 0, 1)
6959
```
7060

@@ -74,11 +64,11 @@ At above example, we could see how maxpool layer will reduce the computation for
7464

7565
For example, we have this single MNIST data of 28x28:
7666

77-
![Pool input]({{ site.baseurl }}/img/2016-07-18-convnet-maxpool-layer/pool_input.png)
67+
<BlogImage imagePath='/img/convnet-maxpool-layer/pool_input.png' />
7868

7969
After we fed the image to our maxpool layer, the result will look like this:
8070

81-
![Pool output]({{ site.baseurl }}/img/2016-07-18-convnet-maxpool-layer/pool_output.png)
71+
<BlogImage imagePath='/img/convnet-maxpool-layer/pool_output.png' />
8272

8373
## Maxpool backward
8474

@@ -87,39 +77,26 @@ Recall, how do we compute the gradient for ReLU layer. We let the gradient pass
8777
Maxpool layer is similar, because that's essentially what max operation do in backpropagation.
8878

8979
```python
90-
9180
# X_col and max_idx are the intermediate variables from the forward propagation step
92-
9381
# Suppose our output from forward propagation step is 5x10x14x14
94-
9582
# We want to upscale that back to 5x10x28x28, as in the forward step
96-
9783
# 4x9800, as in the forward step
98-
9984
dX_col = np.zeros_like(X_col)
10085

10186
# 5x10x14x14 => 14x14x5x10, then flattened to 1x9800
102-
10387
# Transpose step is necessary to get the correct arrangement
104-
10588
dout_flat = dout.transpose(2, 3, 0, 1).ravel()
10689

10790
# Fill the maximum index of each column with the gradient
108-
10991
# Essentially putting each of the 9800 grads
110-
11192
# to one of the 4 row in 9800 locations, one at each column
112-
11393
dX_col[max_idx, range(max_idx.size)] = dout_flat
11494

11595
# We now have the stretched matrix of 4x9800, then undo it with col2im operation
116-
11796
# dX would be 50x1x28x28
118-
11997
dX = col2im_indices(dX_col, (n \* d, 1, h, w), size, size, padding=0, stride=stride)
12098

12199
# Reshape back to match the input dimension: 5x10x28x28
122-
123100
dX = dX.reshape(X.shape)
124101
```
125102

@@ -133,7 +110,7 @@ We also see that doing maxpool with certain parameters, e.g. 2x2 maxpool with st
133110

134111
## References
135112

136-
- <http://cs231n.github.io/convolutional-networks/#pool>
137-
- <http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip>
138-
- <https://arxiv.org/pdf/1512.03385v1>
139-
- <http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf>
113+
- http://cs231n.github.io/convolutional-networks/#pool
114+
- http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip
115+
- https://arxiv.org/pdf/1512.03385v1
116+
- http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf

0 commit comments

Comments
 (0)