Skip to content

Commit f8dbb6e

Browse files
committed
modify : pyspellchecker
1 parent 32ed435 commit f8dbb6e

File tree

1 file changed

+30
-47
lines changed

1 file changed

+30
-47
lines changed

beginner_source/Pretraining_Vgg_from_scratch.py

Lines changed: 30 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Pretraining VGG from scratch
2+
``Pretraining`` VGG from scratch
33
============================
44
55
@@ -55,7 +55,7 @@
5555
# - We train the model from scratch using only the configuration
5656
# presented in the paper.
5757
#
58-
# - we do not use future method, like BatchNormalization,Adam , He
58+
# - we do not use future method, like Batch normalization,Adam , He
5959
# initialization.
6060
#
6161
# - You can apply to ImageNet Data.
@@ -68,15 +68,15 @@
6868

6969

7070
######################################################################
71-
# Why Vgg is so popluar ?
71+
# Why VGG is so popular ?
7272
# -----------------------
7373
#
7474

7575

7676
######################################################################
7777
# VGG became a model that attracted attention because it succeeded in
7878
# building deeper layers and dramatically shortening the training time
79-
# compared to alexNet, which was the sota model at the time.:
79+
# compared to alexnet, which was the SOTA model at the time.:
8080
#
8181

8282

@@ -91,12 +91,12 @@
9191
# this configuration will be explained below section.
9292
#
9393

94-
DatasetName = 'Cifar' # Cifar ,Cifar10, Mnist , ImageNet
94+
DatasetName = 'Cifar' # CIFAR ,CIFAR10, MNIST , ImageNet
9595

9696
## model configuration
9797

9898
num_classes = 100
99-
# CalTech 257 Cifar 100 Cifar10 10 ,Mnist 10 ImageNet 1000
99+
# Caltech 257 CIFAR 100 CIFAR10 10 ,MNIST 10 ImageNet 1000
100100
model_version = None ## you must configure it.
101101

102102
## data configuration
@@ -119,7 +119,7 @@
119119

120120
update_count = int(256/batch_size)
121121
accum_step = int(256/batch_size)
122-
eval_step =26 * accum_step ## CalTech 5 Cifar 5 Mnist 6 , Cifar10 5 ImageNet 26
122+
eval_step =26 * accum_step ## Caltech 5 CIFAR 5 MNIST 6 , CIFAR10 5 ImageNet 26
123123

124124

125125
## model configuration
@@ -147,9 +147,9 @@
147147

148148

149149
######################################################################
150-
# We use ``CIFAR100`` Dataset in this tutorial. In Vgg paper , the authors
151-
# scales image istropically . Then , they apply
152-
# Normalization,RandomCrop,HorizontalFlip . So , we need to override
150+
# We use ``CIFAR100`` Dataset in this tutorial. In VGG paper , the authors
151+
# scales image isotropically . Then , they apply
152+
# Normalization,``RandomCrop``,``HorizontalFlip`` . So , we need to override
153153
# CIFAR100 class to apply preprocessing.
154154
#
155155

@@ -168,8 +168,7 @@ def __init__(self,root,transform = None,multi=False,s_max=None,s_min=256,downloa
168168
A.Normalize(mean =(0.5071, 0.4867, 0.4408) , std = (0.2675, 0.2565, 0.2761)),
169169
A.SmallestMaxSize(max_size=self.S),
170170
A.RandomCrop(height =224,width=224),
171-
A.HorizontalFlip(),
172-
# A.RGBShift()
171+
A.HorizontalFlip()
173172
]
174173

175174
)
@@ -216,12 +215,12 @@ def __getitem__(self, index: int) :
216215

217216

218217
######################################################################
219-
# | In Vgg paper, they do experiment over 6 models. model A is 11 layers,
220-
# model B is 13 layers, model C is 16 layers , model D is 16 laeyrs and
218+
# | In VGG paper, they do experiment over 6 models. model A is 11 layers,
219+
# model B is 13 layers, model C is 16 layers , model D is 16 layers and
221220
# model D is 19 layers . you can train all version of models to
222221
# reproduce VGG .
223222
# | ``Config_Channels`` means output channels and ``Config_kernels`` means
224-
# kerenl size.
223+
# kernel size.
225224
#
226225

227226
import torch
@@ -284,8 +283,7 @@ def __init__(self,version , num_classes):
284283
self.num_classes = num_classes
285284
self.linear_out = 4096
286285
self.xavier_count = xavier_count
287-
self.last_xavier= last_xavier ## if >0 , initialize last 3 fully connected noraml distribution
288-
# conv_1_by_1_3_outchannel = num_classes
286+
self.last_xavier= last_xavier ## if >0 , initialize last 3 fully connected normal distribution
289287
self.except_xavier = except_xavier
290288

291289
super().__init__()
@@ -307,8 +305,6 @@ def __init__(self,version , num_classes):
307305
print('weight intialize end')
308306
def forward(self,x):
309307
x = self.feature_extractor(x)
310-
# x= self.avgpool(x) ## If Linear is output, use this
311-
# x= torch.flatten(x,start_dim = 1) ## If Linear is output, use this
312308
x = self.output_layer(x)
313309
x= self.avgpool(x)
314310
x= torch.flatten(x,start_dim = 1)
@@ -318,15 +314,12 @@ def forward(self,x):
318314
@torch.no_grad()
319315
def _init_weights(self,m):
320316

321-
# print(m)
322317
if isinstance(m,nn.Conv2d):
323318
print('-------------')
324319
print(m.kernel_size)
325320
print(m.out_channels)
326-
# if (m.out_channels == self.num_classes or m.out_channels == self.linear_out) and self.last_xavier>0 :
327321
if self.last_xavier>0 and (self.except_xavier is None or self.last_xavier!=self.except_xavier):
328322
print('xavier')
329-
# self.last_xavier-=1
330323
nn.init.xavier_uniform_(m.weight)
331324
elif self.xavier_count >0 :
332325
print('xavier')
@@ -335,10 +328,8 @@ def _init_weights(self,m):
335328
else :
336329
std = 0.1
337330
print(f'normal std : {std}')
338-
339331
torch.nn.init.normal_(m.weight,std=std)
340-
# if (m.out_channels == self.num_classes or m.out_channels == self.linear_out) :
341-
# self.last_xavier+=10
332+
342333
self.last_xavier +=1
343334
if m.bias is not None :
344335
print('bias zero init')
@@ -361,21 +352,21 @@ def _init_weights(self,m):
361352

362353

363354
######################################################################
364-
# When training Vgg , the authors first train model A , then initialized
355+
# When training VGG , the authors first train model A , then initialized
365356
# the weights of other models with the weights of model A. Waiting for
366357
# Model A to be trained takes a long time . The authors mention how to
367-
# train with xavier initialization rather than initializing with the
358+
# train with ``xavier`` initialization rather than initializing with the
368359
# weights of model A. But, they do not mention how to initialize .
369360
#
370-
# | To Reproduce Vgg , we use xavier initialization method to initialize
371-
# weights. We apply initialization to few first layes and last layers.
361+
# | To Reproduce VGG , we use ``xavier`` initialization method to initialize
362+
# weights. We apply initialization to few first layers and last layers.
372363
# Then , we apply random initialization to other layers.
373-
# | **we must fix stdandrad deviation to 0.1**. If standard deviation is
364+
# | **we must fix standard deviation to 0.1**. If standard deviation is
374365
# larger than 0.1, the weight get NAN values. For stability, we use 0.1
375366
# for standard deviation.
376-
# | The ``front_xavier`` means how many layers we initialize with xavier
367+
# | The ``front_xavier`` means how many layers we initialize with ``xavier``
377368
# initialization in front of layers and The ``last_xavier`` means how
378-
# many layers we initializae with xavier initialization in last of
369+
# many layers we initialize with ``xavier`` initialization in last of
379370
# layers.
380371
#
381372
# In My experiment, we can use ``front_xavier`` = 4 , ``last_xavier``\ =5
@@ -406,17 +397,15 @@ def accuracy(output, target, topk=(1,)):
406397

407398
res = []
408399
for k in topk:
409-
# print(f'top {k}')
410400
correct_k = correct[:k].reshape(-1).float().sum(0,keepdim=True)
411-
# res.append(correct_k.mul_(100.0 / batch_size))
412401
res.append(correct_k)
413402
return res
414403

415404

416405
######################################################################
417406
# we initiate model and loss function and optimizer and schedulers. In
418-
# vgg, they use softmax output ,Momentum Optimizer , and Scheduling based
419-
# on accuarcy.
407+
# VGG, they use softmax output ,Momentum Optimizer , and Scheduling based
408+
# on accuracy.
420409
#
421410

422411
model = Model_vgg(model_version,num_classes)
@@ -440,9 +429,7 @@ def accuracy(output, target, topk=(1,)):
440429
[
441430
A.Normalize(mean =(0.5071, 0.4867, 0.4408) , std = (0.2675, 0.2565, 0.2761)),
442431
A.SmallestMaxSize(max_size=val_data.S),
443-
A.CenterCrop(height =224,width=224),
444-
# A.HorizontalFlip(),
445-
# A.RGBShift()
432+
A.CenterCrop(height =224,width=224)
446433
]
447434

448435
)
@@ -492,7 +479,6 @@ def accuracy(output, target, topk=(1,)):
492479
if i> 0 and i%update_count == 0 :
493480
print(f'Training steps : {i} parameter update loss :{total_loss} ')
494481
if grad_clip is not None:
495-
# print(f'Training steps : {i} parameter grad clip to {grad_clip}')
496482
torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
497483
optimizer.step()
498484
optimizer.zero_grad(set_to_none=True)
@@ -594,8 +580,7 @@ def __init__(self,root,transform = None,multi=False,s_max=None,s_min=256,split=N
594580
A.Normalize(),
595581
A.SmallestMaxSize(max_size=self.S),
596582
A.RandomCrop(height =224,width=224),
597-
A.HorizontalFlip(),
598-
# A.RGBShift()
583+
A.HorizontalFlip()
599584
]
600585

601586
)
@@ -644,17 +629,15 @@ def __getitem__(self, index: int) :
644629
[
645630
A.Normalize(),
646631
A.SmallestMaxSize(max_size=val_data.S),
647-
A.CenterCrop(height =224,width=224),
648-
# A.HorizontalFlip(),
649-
# A.RGBShift()
632+
A.CenterCrop(height =224,width=224)
650633
]
651634

652635
)
653636

654637
######################################################################
655638
# Conculsion
656639
# ----------
657-
# We have seen how pretraining VGG from scratch . This Tutorial will be helpful to reproduce another Foundation Model .
640+
# We have seen how ``pretraining`` VGG from scratch . This Tutorial will be helpful to reproduce another Foundation Model .
658641

659642
######################################################################
660643
# More things to try
@@ -668,5 +651,5 @@ def __getitem__(self, index: int) :
668651
# Further Reading
669652
# ---------------
670653

671-
# - `VGG training using python script <https://github.com/woongjoonchoi/DeepLearningPaper-Reproducing/tree/master/Vgg>`__
654+
# - `VGG training using python script <https://github.com/woongjoonchoi/DeepLearningPaper-Reproducing/tree/master/VGG>`__
672655
# - `VGG paper <https://arxiv.org/abs/1409.1556>`__

0 commit comments

Comments
 (0)