diff --git a/ActivationPrune.py b/ActivationPrune.py new file mode 100644 index 0000000..a3a47b2 --- /dev/null +++ b/ActivationPrune.py @@ -0,0 +1,123 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +import time +from model import * +from train import * +import random +# from .model import ResNetBasicBlock + +from math import sqrt +import copy +from time import time +from Conv2dNew import Execution + +class Conv2dTest(nn.Conv2d): + def __init__(self, + ratio, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + ): + super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, + bias, padding_mode) + self.ratio = ratio + def forward(self, input): + E = Execution(self.ratio) + output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) + return output + +class LinearTest(nn.Linear): + def __init__(self, + in_features, + out_features, + bias=True, + ): + super(LinearTest, self).__init__(in_features, out_features, bias) + + def forward(self, input): + output = F.linear(input, self.weight, self.bias) + return output + +def prepare(model, ratio,inplace=False): + # move intpo prepare + def addActivationPruneOp(module): + nonlocal layer_cnt + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + p_name = str(layer_cnt) + activationPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationPruneConv.bias = child.bias + activationPruneConv.weight = child.weight + module._modules[name] = activationPruneConv + layer_cnt += 1 + elif isinstance(child, nn.Linear): + p_name = str(layer_cnt) + activationPruneLinear = LinearTest( + child.in_features, child.out_features, + bias=(child.bias is not None) + ) + if child.bias is not None: + activationPruneLinear.bias = child.bias + activationPruneLinear.weight = child.weight + module._modules[name] = activationPruneLinear + layer_cnt += 1 + else: + addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + layer_cnt = 0 + if not inplace: + model = copy.deepcopy(model) + addActivationPruneOp( model) # 为每一层添加量化操作 + return model + +def getPruneModel(model_name, weight_file_path,pattern,ratio): + if model_name == 'LeNet': + model_orign = getLeNet() # 加载原始模型框架 + elif model_name == 'AlexNet': + model_orign = getAlexnet() + + if pattern == 'test': + model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 + activationPruneModel = prepare(model_orign,ratio) # 将原始模型转化成量化后的模型,即给每一个卷积层和线形层增加量化剪枝操作 + + return activationPruneModel + +def activationPruneModelOp(model_name, weight_file_path, batch_size, img_size,pattern,ratio): + ''' + :param model_name: 要训练的模型名称 + :param weight_file_path: 权重文件的地址 + :param batch_size: 训练时一个batch的大小 + :param img_size: 数据集中图片大小的要求 + :param pattern: 模式选择,是训练模式还是推理模式 + :return: + ''' + if model_name == 'VGG16' or model_name == 'AlexNet' or model_name == 'ResNet' or model_name == 'vgg16_thu' or model_name == 'SqueezeNet': + dataloaders, dataset_sizes = load_cifar10(batch_size=batch_size, pth_path='./data', + img_size=img_size) # 确定数据集 + elif model_name == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batch_size, path='./data', img_size=img_size) + + activationPruneModel = getPruneModel(model_name, weight_file_path,pattern,ratio) + criterion = nn.CrossEntropyLoss() + if pattern == 'train': + optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.01, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion, optimizer=optimizer, name='SqueezeNet_5', + scheduler=scheduler, num_epochs=30, rerun=False) # 进行模型的训练 + elif pattern == 'test': + test_model(activationPruneModel,dataloaders, dataset_sizes,criterion=criterion) diff --git a/Conv2dNew.py b/Conv2dNew.py new file mode 100644 index 0000000..0513a0f --- /dev/null +++ b/Conv2dNew.py @@ -0,0 +1,181 @@ +import math +import numpy as np +import copy +import torch +def determine_padding(filter_shape, output_shape="same"): + ''' + :param filter_shape: + :param output_shape: + :return: + ''' + # No padding + if output_shape == "valid": + return (0, 0), (0, 0) + # Pad so that the output shape is the same as input shape (given that stride=1) + elif output_shape == "same": + filter_height, filter_width = filter_shape + + # Derived from: + # output_height = (height + pad_h - filter_height) / stride + 1 + # In this case output_height = height and stride = 1. This gives the + # expression for the padding below. + pad_h1 = int(math.floor((filter_height - 1)/2)) + pad_h2 = int(math.ceil((filter_height - 1)/2)) + pad_w1 = int(math.floor((filter_width - 1)/2)) + pad_w2 = int(math.ceil((filter_width - 1)/2)) + else: + pad_h1 = output_shape[0] + pad_h2 = output_shape[0] + pad_w1 = output_shape[1] + pad_w2 = output_shape[1] + + return (pad_h1, pad_h2), (pad_w1, pad_w2) + +def image_to_column(images, filter_shape, stride, output_shape='same'): + filter_height, filter_width = filter_shape + pad_h, pad_w = determine_padding(filter_shape, output_shape)# Add padding to the image + images_padded = torch.nn.functional.pad(images, [1,1,1,1], mode='constant')# Calculate the indices where the dot products are to be applied between weights + # and the image + k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride) + + # Get content from image at those indices + cols = images_padded[:, k, i, j] + channels = images.shape[1] + # Reshape content into column shape + # cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1) + cols = cols.permute(1, 2, 0).reshape(filter_height * filter_width * channels, -1) + + return cols + +def get_im2col_indices(images_shape, filter_shape, padding, stride=1): + # First figure out what the size of the output should be + batch_size, channels, height, width = images_shape + filter_height, filter_width = filter_shape + pad_h, pad_w = padding + out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1) + out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1) + + i0 = np.repeat(np.arange(filter_height), filter_width) + i0 = np.tile(i0, channels) + i1 = stride * np.repeat(np.arange(out_height), out_width) + j0 = np.tile(np.arange(filter_width), filter_height * channels) + j1 = stride * np.tile(np.arange(out_width), out_height) + i = i0.reshape(-1, 1) + i1.reshape(1, -1) + j = j0.reshape(-1, 1) + j1.reshape(1, -1) + k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1) + return (k, i, j) + +class Layer(object): + + def set_input_shape(self, shape): + """ Sets the shape that the layer expects of the input in the forward + pass method """ + self.input_shape = shape + + def layer_name(self): + """ The name of the layer. Used in model summary. """ + return self.__class__.__name__ + + def parameters(self): + """ The number of trainable parameters used by the layer """ + return 0 + + def forward_pass(self, X, training): + """ Propogates the signal forward in the network """ + raise NotImplementedError() + + def backward_pass(self, accum_grad): + """ Propogates the accumulated gradient backwards in the network. + If the has trainable weights then these weights are also tuned in this method. + As input (accum_grad) it receives the gradient with respect to the output of the layer and + returns the gradient with respect to the output of the previous layer. """ + raise NotImplementedError() + + def output_shape(self): + """ The shape of the output produced by forward_pass """ + raise NotImplementedError() + +class Execution(Layer): + """A 2D Convolution Layer. + Parameters: + ----------- + n_filters: int + The number of filters that will convolve over the input matrix. The number of channels + of the output shape. + filter_shape: tuple + A tuple (filter_height, filter_width). + input_shape: tuple + The shape of the expected input of the layer. (batch_size, channels, height, width) + Only needs to be specified for first layer in the network. + padding: string + Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width + matches the input height and width. For 'valid' no padding is added. + stride: int + The stride length of the filters during the convolution over the input. + """ + def __init__(self,ratio): + self.ratio = ratio + pass + + def conv2d(self, input,weight,bias,stride,padding): + self.input = input + self.weight = weight + self.bias = bias + self.stride = stride + self.padding = padding + + self.n_filters = self.weight.shape[0] # 卷积核的个数 + self.filter_shape = (self.weight.shape[2], self.weight.shape[3]) + self.input_shape = [self.input.shape[1],self.input.shape[2],self.input.shape[3]] + self.trainable = False + + batch_size, channels, height, width = self.input.shape + # Turn image shape into column shape + # (enables dot product between input and weights) + self.X_col = image_to_column(self.input, self.filter_shape, stride=self.stride[0], output_shape=self.padding) + # Turn weights into column shape + if self.ratio != 0: + compareRatio = math.ceil(self.ratio * self.X_col.shape[0]) + self.X_col = self.activationSlidePrune(self.X_col,compareRatio) + self.W_col = self.weight.reshape((self.n_filters, -1)) + # Calculate output + output = torch.einsum('ij,jk->ik',self.W_col,self.X_col) + torch.unsqueeze(self.bias,1) + # Reshape into (n_filters, out_height, out_width, batch_size) + output = output.reshape(self.output_shape() + (batch_size, )) + # Redistribute axises so that batch size comes first + return output.permute(3,0,1,2) + + def output_shape(self): + channels, height, width = self.input_shape + pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding) + output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride[0] + 1 + output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride[0] + 1 + return self.n_filters, int(output_height), int(output_width) + + def parameters(self): + return np.prod(self.W.shape) + np.prod(self.w0.shape) + + def activationSlidePrune(self,input,compareRatio): + matrixOne = torch.ones(input.shape,device='cuda:0') + andOp = torch.logical_and(matrixOne,input) + andSum = torch.sum(andOp,dim=0) + + # pruneNumber = 0 + # zerosNumber = 0 + for i in range(len(andSum)): + if andSum[i] <= compareRatio and andSum[i]!=0: + for m in range(input.shape[0]): + input[m][i] = 0 + # pruneNumber += 1 + # if andSum[i] == 0: + # zerosNumber += 1 + # print('pruneNumberRatio=',pruneNumber / (input.shape[1])) + # print('zerosNumberRatio=',zerosNumber / (input.shape[1])) + return input + +# image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8) +# input_shape=image.squeeze().shape +# conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='same', stride=1) +# conv2d.initialize(None) +# output=conv2d.forward_pass(image,training=True) +# print(output.shape) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..18729c0 --- /dev/null +++ b/main.py @@ -0,0 +1,11 @@ +from ActivationPrune import * + + +if __name__ == '__main__': + weight_file_path = './pth/LeNet/LeNet.pth' + model_name = 'LeNet' + batch_size = 64 + img_size = 32 + pattern = 'test' #pattern='test' or pattern='train' + ratio = 0.4 #ratio=0为训练模式 + activationPruneModelOp(model_name, weight_file_path, batch_size, img_size,pattern,ratio) \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..d79cd91 --- /dev/null +++ b/model.py @@ -0,0 +1,123 @@ +from collections import OrderedDict +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch.nn.functional as F +import math +import torch + +class ThuAlexNet(nn.Module): + def __init__(self, num_classes=10, drop_rate=0.5): + super(ThuAlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(64, 192, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(drop_rate), + # nn.Linear(256 * 6 * 6, 4096), + nn.Linear(1024, 4096), + nn.ReLU(inplace=True), + nn.Dropout(drop_rate), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x +class AlexNet(nn.Module): + + def __init__(self, num_classes=10): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(96, 256, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(256, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + # nn.Softmax() + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + return x +class LeNet(nn.Module): + def __init__(self, num_classes=10): + super(LeNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 6, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(6, 16, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(16, 120, kernel_size=5), + nn.ReLU(inplace=True) + ) + self.classifier = nn.Sequential( + nn.Linear(120, 84), + nn.ReLU(inplace=True), + nn.Linear(84, num_classes) + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + + + + + +def getLeNet(num_classes=10): + model = LeNet(num_classes) + return model + +def getAlexnet(num_classes=10): + model = AlexNet(num_classes) + return model + + + + + diff --git a/train.py b/train.py new file mode 100644 index 0000000..4135d5f --- /dev/null +++ b/train.py @@ -0,0 +1,283 @@ +from __future__ import print_function, division +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +import numpy as np +import torchvision +from torchvision import datasets, models, transforms +import matplotlib.pyplot as plt +import time +import os +import copy +from tqdm import tqdm +from collections import OrderedDict + +def download_mnist(save_path): + torchvision.datasets.MNIST(root=save_path,train=True,download=True) + torchvision.datasets.MNIST(root=save_path,train=False,download=True) + return save_path + +def load_mnist(batch_size=64,path='',img_size=32): + if img_size != 32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()] + ) + else: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + trainset = torchvision.datasets.MNIST(root=path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.MNIST(root=path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":60000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar10(save_path): + torchvision.datasets.CIFAR10(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR10(root=save_path,train=False,download=True) + return save_path + +def load_cifar10(batch_size=64,pth_path='./data',img_size=32): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR10(root=pth_path, train=True,download=False, transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.CIFAR10(root=pth_path, train=False,download=False, transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":50000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar100(save_path): + torchvision.datasets.CIFAR100(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR100(root=save_path,train=False,download=False) + return save_path + +def load_cifar100(batch_size,pth_path,img_size): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR100(root=pth_path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True,num_workers=2) + testset = torchvision.datasets.CIFAR100(root=pth_path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=True,num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_size ={"train":50000,"val":10000} + return dataloaders,dataset_size +def test_model(model,dataloaders,dataset_sizes,criterion): + print("validation model:") + phase = "val" + model.cuda() + model.eval() + with torch.no_grad(): + running_loss = 0.0 + running_acc = 0.0 + for inputs,labels in tqdm(dataloaders[phase]): + inputs,labels = inputs.cuda(),labels.cuda() + outputs = model(inputs) + _,preds = torch.max(outputs,1) + loss = criterion(outputs,labels) + running_loss += loss.item() * inputs.size(0) + running_acc += torch.sum(preds == labels.data) + epoch_loss = running_loss/dataset_sizes[phase] + epoch_acc = running_acc / dataset_sizes[phase] + epoch_acc = epoch_acc.item() + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + return epoch_acc,epoch_loss + + +def train_model_jiang(model, dataloaders, dataset_sizes, criterion, optimizer, name,scheduler=None, num_epochs=100,rerun=False): + if rerun == True: + print('我进来了') + print(num_epochs) + since = time.time() + model.load_state_dict(torch.load('./test_20.pth')) + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + + model.cuda() + for epoch in range(20, num_epochs): + print('Epoch {}/{}'.format(epoch + 1, num_epochs)) + print('-' * 10) + print('the %d lr:%f' % (epoch + 1, optimizer.param_groups[0]['lr'])) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + i = 0 + loss_a = 0 + p = 0 + for data in dataloaders[phase]: + inputs, labels = data + inputs = inputs.cuda() + labels = labels.cuda() + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f' % (epoch + 1, i + 1, loss_a)) + loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = './test_{}.pth'.format(epoch+1) + torch.save(model.state_dict(), path) + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = './best.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + + if rerun == False: + since = time.time() + + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + + model.cuda() + for epoch in range(num_epochs): + print('Epoch {}/{}'.format(epoch+1, num_epochs)) + print('-' * 10) + print('the %d lr:%f'%(epoch+1,optimizer.param_groups[0]['lr'])) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + i = 0 + loss_a = 0 + p = 0 + for data in dataloaders[phase]: + inputs,labels = data + inputs = inputs.cuda() + labels = labels.cuda() + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f'%(epoch+1,i+1,loss_a)) + loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = './test_{}.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = './pth/Lenet/LeNet.pth' + torch.save(model.state_dict(), path) + return model diff --git "a/\350\213\245\345\271\262\346\226\207\344\273\266\343\200\201\345\207\275\346\225\260\344\270\216\345\207\275\346\225\260\345\217\202\346\225\260\347\232\204\350\257\264\346\230\216.md" "b/\350\213\245\345\271\262\346\226\207\344\273\266\343\200\201\345\207\275\346\225\260\344\270\216\345\207\275\346\225\260\345\217\202\346\225\260\347\232\204\350\257\264\346\230\216.md" new file mode 100644 index 0000000..c761fb1 --- /dev/null +++ "b/\350\213\245\345\271\262\346\226\207\344\273\266\343\200\201\345\207\275\346\225\260\344\270\216\345\207\275\346\225\260\345\217\202\346\225\260\347\232\204\350\257\264\346\230\216.md" @@ -0,0 +1,23 @@ +# 一、main.py +## 1. main函数参数说明 +1. weight\_file\_path-读取的权重文件的地址。 +2. model_name-进行运算的模型的名称。 +3. batch_size-批处理图像的数目。 +4. img_size-单次处理图像的大小。 +5. pattern-运算模式: + (1)pattern='test'时为检测输入特征图进行剪枝后的精度,不涉及到模型训练。 + (2)pattern='train'时为训练初始模型,此时不对输入特征图进行剪枝,下个参数ratio设为0。 +6. ratio-剪枝率。将输入特征图转化成大小为m*n的矩阵后,对每一列的数据进行剪枝。此时ratio的作用是确定剪枝的阈值。以ratio=0.2为例,当某一列中的非0值数据个数<=ratio\*m时,对该列进行剪枝,即数据清零。 +# 二、Conv2dNew.py +## 1. Conv2dNew.py文件作用 + 在本设计中,暂时主要考虑对卷积层的输入特征图进行剪枝操作。在原始的模型训练代码中,输入特征图的img2col展开是采用C语言编写的,作为底层代码被调用,因此需要编写代码复现卷积操作,即复现F.conv2d()函数。 +## 2.函数determine_padding函数作用 + 根据输入的padding模式或padding数值对原输入特征图进行padding操作。 +## 3.函数image\_to_column函数作用 + 实现输入特征图的img2col操作。 +## 4.函数get_im2col_indices函数作用 + 作为辅助函数实现输入特征图img2col后的矩阵大小的确定。 +## 5.Layer类的作用 + Layer类为通用基类,通过继承该基类可以实现不同的层,例如卷积层、池化层、批量归一化层等等。 +## 6.Execution类的作用 + 该类包含若干层的操作,此处主要实现卷积操作,由类函数conv2d实现。