diff --git a/ActivationPrune.py b/ActivationPrune.py new file mode 100644 index 0000000..f0a3c98 --- /dev/null +++ b/ActivationPrune.py @@ -0,0 +1,139 @@ +import copy +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +import time +from model import * +from train import * +import random +# from .model import ResNetBasicBlock + +from math import sqrt +import copy +from time import time +from Conv2dNew import Execution + + + +class Conv2dTest(nn.Conv2d): + def __init__(self, + ratio, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + ): + super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, + bias, padding_mode) + self.ratio = ratio + def forward(self, input): + E = Execution(self.ratio) + output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding) + return output + +class LinearTest(nn.Linear): + def __init__(self, + in_features, + out_features, + bias=True, + ): + super(LinearTest, self).__init__(in_features, out_features, bias) + + def forward(self, input): + output = F.linear(input, self.weight, self.bias) + return output + +def prepare(model, ratio,inplace=False): + # move intpo prepare + def addActivationPruneOp(module): + nonlocal layer_cnt + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + p_name = str(layer_cnt) + activationPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationPruneConv.bias = child.bias + activationPruneConv.weight = child.weight + module._modules[name] = activationPruneConv + layer_cnt += 1 + elif isinstance(child, nn.Linear): + p_name = str(layer_cnt) + activationPruneLinear = LinearTest( + child.in_features, child.out_features, + bias=(child.bias is not None) + ) + if child.bias is not None: + activationPruneLinear.bias = child.bias + activationPruneLinear.weight = child.weight + module._modules[name] = activationPruneLinear + layer_cnt += 1 + else: + addActivationPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + layer_cnt = 0 + if not inplace: + model = copy.deepcopy(model) + addActivationPruneOp( model) # 为每一个卷积层添加输入特征图剪枝操作 + return model + +def getModel(modelName): + if modelName == 'LeNet': + return getLeNet() # 加载原始模型框架 + elif modelName == 'AlexNet': + return getAlexnet() + elif modelName == 'VGG16': + return get_vgg16() + elif modelName == 'SqueezeNet': + return get_squeezenet() + elif modelName == 'ResNet': + return get_resnet18() + elif modelName == 'InceptionV3': + return get_inception_v3() + # if modelName == 'MobileNet': + # return mobilenetv3_large() + +def getDataSet(modelName,batchSize,imgSize): + if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet' or modelName=='InceptionV3': + dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', + img_size=imgSize) # 确定数据集 + elif modelName == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) + + return dataloaders,dataset_sizes + +def getPruneModel(model_name, weight_file_path,pattern,ratio): + model_orign = getModel(model_name) + if pattern == 'test' or pattern == 'retrain': + model_orign.load_state_dict(torch.load(weight_file_path)) # 原始模型框架加载模型信息 + activationPruneModel = prepare(model_orign,ratio) + + return activationPruneModel + +def activationPruneModelOp(model_name, batch_size, img_size,pattern,ratio,epoch): + dataloaders, dataset_sizes = getDataSet(model_name, batch_size, img_size) + criterion = nn.CrossEntropyLoss() + + if pattern == 'retrain' or pattern == 'train': + weight_file_path = './pth/' + model_name + '/ratio=0'+ '/Activation' + '/best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.01, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, ratio, 'activation',pattern, criterion=criterion,optimizer=optimizer, name=model_name, + scheduler=scheduler, num_epochs=epoch, rerun=False) # 进行模型的训练 + if pattern == 'test': + weight_file_path = './pth/' + model_name + '/ratio=' + str(ratio) + '/Activation/' + 'best.pth' + activationPruneModel = getPruneModel(model_name, weight_file_path, pattern, ratio) + test_model(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion) + + diff --git a/ActivationPrune.xlsx b/ActivationPrune.xlsx new file mode 100644 index 0000000..e053805 Binary files /dev/null and b/ActivationPrune.xlsx differ diff --git a/Conv2dNew.py b/Conv2dNew.py new file mode 100644 index 0000000..88f6ea4 --- /dev/null +++ b/Conv2dNew.py @@ -0,0 +1,197 @@ +import math +import numpy as np +import copy +import torch +def determine_padding(filter_shape, output_shape="same"): + ''' + :param filter_shape: + :param output_shape: + :return: + ''' + # No padding + if output_shape == "valid": + return (0, 0), (0, 0) + # Pad so that the output shape is the same as input shape (given that stride=1) + elif output_shape == "same": + filter_height, filter_width = filter_shape + + # Derived from: + # output_height = (height + pad_h - filter_height) / stride + 1 + # In this case output_height = height and stride = 1. This gives the + # expression for the padding below. + pad_h1 = int(math.floor((filter_height - 1)/2)) + pad_h2 = int(math.ceil((filter_height - 1)/2)) + pad_w1 = int(math.floor((filter_width - 1)/2)) + pad_w2 = int(math.ceil((filter_width - 1)/2)) + else: + pad_h1 = output_shape[0] + pad_h2 = output_shape[0] + pad_w1 = output_shape[1] + pad_w2 = output_shape[1] + + return (pad_h1, pad_h2), (pad_w1, pad_w2) + +def image_to_column(images, filter_shape, stride, output_shape='same'): + filter_height, filter_width = filter_shape + pad_h, pad_w = determine_padding(filter_shape, output_shape)# Add padding to the image + images_padded = torch.nn.functional.pad(images, [pad_h[0],pad_h[0],pad_w[0],pad_w[1]], mode='constant')# Calculate the indices where the dot products are to be applied between weights + # and the image + k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride) + + # Get content from image at those indices + cols = images_padded[:, k, i, j] + channels = images.shape[1] + # Reshape content into column shape + # cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1) + cols = cols.permute(1, 2, 0).reshape(filter_height * filter_width * channels, -1) + + return cols + +def get_im2col_indices(images_shape, filter_shape, padding, stride=1): + # First figure out what the size of the output should be + batch_size, channels, height, width = images_shape + filter_height, filter_width = filter_shape + pad_h, pad_w = padding + out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1) + out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1) + + i0 = np.repeat(np.arange(filter_height), filter_width) + i0 = np.tile(i0, channels) + i1 = stride * np.repeat(np.arange(out_height), out_width) + j0 = np.tile(np.arange(filter_width), filter_height * channels) + j1 = stride * np.tile(np.arange(out_width), out_height) + i = i0.reshape(-1, 1) + i1.reshape(1, -1) + j = j0.reshape(-1, 1) + j1.reshape(1, -1) + k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1) + return (k, i, j) + +class Layer(object): + + def set_input_shape(self, shape): + """ Sets the shape that the layer expects of the input in the forward + pass method """ + self.input_shape = shape + + def layer_name(self): + """ The name of the layer. Used in model summary. """ + return self.__class__.__name__ + + def parameters(self): + """ The number of trainable parameters used by the layer """ + return 0 + + def forward_pass(self, X, training): + """ Propogates the signal forward in the network """ + raise NotImplementedError() + + def backward_pass(self, accum_grad): + """ Propogates the accumulated gradient backwards in the network. + If the has trainable weights then these weights are also tuned in this method. + As input (accum_grad) it receives the gradient with respect to the output of the layer and + returns the gradient with respect to the output of the previous layer. """ + raise NotImplementedError() + + def output_shape(self): + """ The shape of the output produced by forward_pass """ + raise NotImplementedError() + +class Execution(Layer): + """A 2D Convolution Layer. + Parameters: + ----------- + n_filters: int + The number of filters that will convolve over the input matrix. The number of channels + of the output shape. + filter_shape: tuple + A tuple (filter_height, filter_width). + input_shape: tuple + The shape of the expected input of the layer. (batch_size, channels, height, width) + Only needs to be specified for first layer in the network. + padding: string + Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width + matches the input height and width. For 'valid' no padding is added. + stride: int + The stride length of the filters during the convolution over the input. + """ + def __init__(self,ratio): + self.ratio = ratio + pass + + def conv2d(self, input,weight,bias,stride,padding): + self.input = input + self.weight = weight + self.bias = bias + self.stride = stride + self.padding = padding + + self.n_filters = self.weight.shape[0] # 卷积核的个数 + self.filter_shape = (self.weight.shape[2], self.weight.shape[3]) + self.input_shape = [self.input.shape[1],self.input.shape[2],self.input.shape[3]] + self.trainable = False + + batch_size, channels, height, width = self.input.shape + # Turn image shape into column shape + # (enables dot product between input and weights) + self.X_col = image_to_column(self.input, self.filter_shape, stride=self.stride[0], output_shape=self.padding) + # Turn weights into column shape + if self.ratio != 0: + compareRatio = math.ceil(self.ratio * self.X_col.shape[0]) + self.X_col = self.activationSlidePrune(self.X_col,compareRatio) + self.W_col = self.weight.reshape((self.n_filters, -1)) + # Calculate output + if self.bias is not None: + output = torch.einsum('ij,jk->ik',self.W_col,self.X_col) + (torch.unsqueeze(self.bias,1) ) + else: + output = torch.einsum('ij,jk->ik', self.W_col, self.X_col) + # Reshape into (n_filters, out_height, out_width, batch_size) + output = output.reshape(self.output_shape() + (batch_size, )) + # Redistribute axises so that batch size comes first + return output.permute(3,0,1,2) + + + def output_shape(self): + channels, height, width = self.input_shape + pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding) + output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride[0] + 1 + output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride[0] + 1 + return self.n_filters, int(output_height), int(output_width) + + def parameters(self): + return np.prod(self.W.shape) + np.prod(self.w0.shape) + + def compressionRateStatistics(self,input,andSum,compareRatio): + pruneNumber = 0 + zerosNumber = 0 + for i in range(input.shape[1]): + if andSum[i] == 0: + zerosNumber += 1 + if andSum[i] != 0 and andSum[i] <= compareRatio: + pruneNumber += 1 + print('pruneNumberRatio=', pruneNumber / (input.shape[1])) + print('zerosNumberRatio=', zerosNumber / (input.shape[1])) + + def accuracyTest(self,andSum): + for i in range(len(andSum)): + print(i,andSum[i]) + + def activationSlidePrune(self,input,compareRatio): + matrixOne = torch.ones(input.shape,device='cuda:0') + + x = torch.clone(torch.detach(input)) + andOp = torch.logical_and(matrixOne,x) + andSum = torch.sum(andOp,dim=0) + + # self.compressionRateStatistics(input,andSum,compareRatio) + # self.accuracyTest(andSum) + + x1 = x.permute(1,0) + x1[(andSum<=compareRatio),] = 0 + x = x1.permute(1,0) + return x + +# image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8) +# input_shape=image.squeeze().shape +# conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='same', stride=1) +# conv2d.initialize(None) +# output=conv2d.forward_pass(image,training=True) +# print(output.shape) \ No newline at end of file diff --git a/K_means.py b/K_means.py new file mode 100644 index 0000000..2a754db --- /dev/null +++ b/K_means.py @@ -0,0 +1,153 @@ +# 聚类算法 + +import random +import pandas as pd +import numpy as np +import copy +import math + + +# 计算距离 +def Dis(dataSet, centroids, k): + # 处理质心 + # 如果之前分类的个数不够k类 + if len(centroids) < k: + centroids = np.append(centroids, random.sample(list(dataSet), k-len(centroids)), axis=0) + + # 处理节点 + clalist=[] + for data in dataSet: + #(np.tile(a,(2,1))就是把a先沿x轴复制1倍,即没有复制,仍然是 [0,1,2]。 再把结果沿y方向复制2倍得到array([[0,1,2],[0,1,2]])) + diff = np.tile(data, (k, 1)) + mul_Diff = np.multiply(diff, centroids) + mul_Dist = np.sum(mul_Diff, axis=1) #和 (axis=1表示行) + clalist.append(mul_Dist) + clalist = np.array(clalist) #返回一个每个点到质点的距离len(dateSet)*k的数组 + return clalist + + +# 计算质心 +def classify(dataSet, centroids, k): + # 计算样本到质心的距离 + clalist = Dis(dataSet, centroids, k) + # 分组并计算新的质心 + minDistIndices = np.argmax(clalist, axis=1) #axis=1 表示求出每行的最小值的下标 + newCentroids = pd.DataFrame(dataSet).groupby(minDistIndices).mean() #DataFramte(dataSet)对DataSet分组,groupby(min)按照min进行统计分类,mean()对分类结果求均值 + newCentroids = newCentroids.values + + # 对新质心,也分配成1-value_sum的形式,否则会出现小数 + for centro in newCentroids: + # centro是一个一维向量 + sorted_data=np.argsort(centro) # 排序信息 + value = 1 + for valueIndex in sorted_data: + centro[valueIndex] = value + value += 1 + + # 计算变化量 + # 有可能新分类个数不够k + if len(newCentroids) != len(centroids): + changed = 1 # 肯定有变化 + else: + changed = newCentroids - centroids # 有可能没变化 + + return changed, newCentroids + + +#确定初始中心点 +def euler_distance(point1: list, point2: list) -> float: + """ + 计算两点之间的欧拉距离,支持多维 + distance = 0.0 + for a, b in zip(point1, point2): + distance += math.pow(a - b, 2) + return math.sqrt(distance) + """ + distance = 0.0 + for a, b in zip(point1, point2): + distance += a*b + return distance + + +def get_closest_dist(point, centroids): + min_dist = math.inf # 初始设为无穷大 + for i, centroid in enumerate(centroids): + dist = euler_distance(centroid, point) + if dist < min_dist: + min_dist = dist + return min_dist + + +def kpp_centers(data_set: list, k: int) -> list: + """ + 从数据集中返回 k 个对象可作为质心 + """ + cluster_centers = [] + cluster_centers.append(random.choice(data_set)) + d = [0 for _ in range(len(data_set))] + for _ in range(1, k): + total = 0.0 + for i, point in enumerate(data_set): + d[i] = get_closest_dist(point, cluster_centers) # 与最近一个聚类中心的距离 + total += d[i] + total *= random.random() + for i, di in enumerate(d): # 轮盘法选出下一个聚类中心; + total -= di + if total > 0: + continue + cluster_centers.append(data_set[i]) + break + return cluster_centers + + +# 使用k-means分类 +def kmeans(dataSet, k): + # 将dataSet预处理成为算距离需要使用的重要程度矩阵 + valueSet = np.zeros(dataSet.shape, dtype=int) # 初始矩阵 + for index in range(len(dataSet)): + data = dataSet[index] + value = valueSet[index] + sorted_data=list(map(abs,data)) # 绝对值 + sorted_data=np.argsort(sorted_data) # 排序信息 + i = 1 # 对于越小的值,分配的i越小 + for valueIndex in sorted_data: + value[valueIndex] = i + i += 1 + + # 随机取质心 + # centroids = random.sample(dataSet, k) + centroids=kpp_centers(valueSet, k) + + # 更新质心 直到变化量全为0 + i=100 + changed, newCentroids = classify(valueSet, centroids, k) + # while(i): #while np.any(changed != 0) + while np.any(changed != 0) and i > 0: + changed, newCentroids = classify(valueSet, newCentroids, k) + i=i-1 + print("第{}次迭代".format(100-i)) + + centroids = sorted(newCentroids.tolist()) #tolist()将矩阵转换成列表 sorted()排序 + + clalist = Dis(valueSet, centroids, k) + minDistIndices = np.argmax(clalist, axis=1) + return minDistIndices + + +def getCluster(input, clusters_num): + # 对卷积层聚类为4维,对全连接层聚类为2维 + if len(input.shape) == 2: # 如果是全连接层 + fcValues = input.detach().cpu().numpy() # 转成numpy + # input.shape[1]是聚类基本单位的数据个数 + clusterIndex = kmeans(fcValues, clusters_num) # 分类 + elif len(input.shape) == 4: # 卷积层 + kernel_size = input.shape[3] # 卷积核尺寸 + preShape = input.shape[:2] # 四维数据的前两维 + inputCut = input.view(preShape[0]*preShape[1], kernel_size*kernel_size) # 降维后的数据,四维到二维 + convValues = inputCut.detach().cpu().numpy() # 转成numpy + clusterIndex = kmeans(convValues, clusters_num) # 分类 + clusterIndex.resize(preShape) + else: + clusterIndex = None + + return clusterIndex \ No newline at end of file diff --git a/Op.py b/Op.py new file mode 100644 index 0000000..ddbd7a0 --- /dev/null +++ b/Op.py @@ -0,0 +1,47 @@ +from ActivationPrune import activationPruneModelOp +from WeightPrune import weightPruneModelOp +import os +def makeDir(model_name,ratio,patternA): + if not os.path.exists('./pth/' + model_name + '/ratio=' + str(ratio)): # + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Activation') + if patternA != 'train': + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/ActivationWeight') + os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Weight') + +def Op(operation,model_name,batch_size,img_size,ratio,epochA,epochAW,weightParameter,LinearParameter): + if operation == 'trainInitialModel': # 训练初始模型 + patternA = 'train' + ratio = 0 + makeDir(model_name,ratio,patternA) + activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + + if operation == 'onlyActivationPruneWithRetrain': # 只进行输入特征图的剪枝,不进行权重的聚类剪枝 + patternA = 'retrain' + makeDir(model_name,ratio,patternA) + activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + + if operation == 'onlyWeightPruneWithRetrain': + patternA = 'test' + patternW = 'train' + ratio = 0 + makeDir(model_name,ratio,patternA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter,LinearParameter) + + if operation == 'activationWeightPruneWithRetrain': + patternA = 'retrain' + patternW = 'retrain' + makeDir(model_name, ratio, patternA) + activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) + + if operation == 'onlyActivationPruneTest': + patternA = 'test' + makeDir(model_name, ratio, patternA) + activationPruneModelOp(model_name, batch_size, img_size, patternA, ratio, epochA) + + if operation == 'activationWeightPruneTest': + patternA = 'test' + patternW = 'test' + makeDir(model_name, ratio, patternA) + weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW, epochAW, weightParameter, LinearParameter) + diff --git a/WeightPrune.py b/WeightPrune.py new file mode 100644 index 0000000..88abc06 --- /dev/null +++ b/WeightPrune.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +import torch +import torchvision.transforms as transforms +import torch.optim as optim +from torch.utils.data import DataLoader +import torch.nn.utils.prune as prune +import pandas as pd +import numpy as np +from K_means import getCluster +import torch.nn as nn +from model import * +from train import * +from ActivationPrune import Conv2dTest,LinearTest +from torch.nn.parameter import Parameter + +def scp_upgrade(kernel,old_scp): + old_scp+=np.abs(kernel.cpu().detach().numpy()) + return old_scp + +def scp_binaeryzation(scps,C): + if len(scps.shape)==3: + for r in np.arange(0,scps.shape[0]): + series=pd.Series(scps[r].ravel()) + rank_info=series.rank() + for i in np.arange(0,scps[r].shape[0]): + for j in np.arange(0,scps[r].shape[1]): + index=i*scps[r].shape[0]+j + if(rank_info[index]<=C): + scps[r][i][j]=0 + else: + scps[r][i][j]=1 + + elif len(scps.shape)==2: + for r in np.arange(0,scps.shape[0]): + series=pd.Series(scps[r].ravel()) + rank_info=series.rank() + for i in np.arange(0,scps[r].shape[0]): + index=i + if(rank_info[index]<=C): + scps[r][i]=0 + else: + scps[r][i]=1 + +class PatternPruningMethod(prune.BasePruningMethod): + PRUNING_TYPE= "unstructured" + + def __init__(self, custers_num, cut_num, pruning_type): + self.clusters_num=custers_num + self.cut_num=cut_num + self.pruning_type=pruning_type + prune.BasePruningMethod.__init__(self) + + def compute_mask(self, t, default_mask): + mask=default_mask.clone()#复制一个mask大小等于当前层的filter + if self.pruning_type=='conv': + scps=np.zeros(self.clusters_num*default_mask.shape[-1]*default_mask.shape[-1])#复制num个scp,表示每一个卷积族的pattern + scps.resize(self.clusters_num,default_mask.shape[-1],default_mask.shape[-1]) + + clusters=getCluster(t,self.clusters_num)#输入当前层的filter,获得其聚类信息 + + print(clusters) + + for i in np.arange(0,clusters.shape[0]):#遍历所有kernel,计算所有cluster的scp + for j in np.arange(0,clusters.shape[1]): + scp_upgrade(t[i][j],scps[clusters[i][j]]) + + scp_binaeryzation(scps,self.cut_num)#根据scp二值化获得真正的pattern + print(scps) + + for i in np.arange(0,clusters.shape[0]):#根据scp和每个kernel的族编号得到最终的mask + for j in np.arange(0,clusters.shape[1]): + mask[i][j]=torch.from_numpy(scps[clusters[i][j]]) + + elif self.pruning_type=='full': + + scps=np.zeros(self.clusters_num*default_mask.shape[-1]) + scps.resize(self.clusters_num,default_mask.shape[-1]) + + clusters=getCluster(t,self.clusters_num) + + print(clusters) + + for i in np.arange(0,clusters.shape[0]): + scp_upgrade(t[i],scps[int(clusters[i])]) + + scp_binaeryzation(scps,self.cut_num)#根据scp二值化获得真正的pattern + print(scps) + + for i in np.arange(0,clusters.shape[0]):#根据scp和每个kernel的族编号得到最终的mask + mask[i]=torch.from_numpy(scps[int(clusters[i])]) + + + return mask + +def weightPrune(model_name,ratio,weightPrameter,LinearPrameter,inplace=False): + def activationWeightPruneOp(module): + for name, child in module.named_children(): + if isinstance(child, nn.Conv2d): + print(child) + print(child.weight.shape) + print('custers_num=6', 'cut_num=', child.weight.shape[-1] * child.weight.shape[-2] / weightPrameter, + 'pruning_type=conv') + convPruning = PatternPruningMethod(custers_num=6, + cut_num=child.weight.shape[-1] * child.weight.shape[-2] / weightPrameter, + pruning_type='conv') + convPruning.apply(child, 'weight', 6, child.weight.shape[-1] * child.weight.shape[-2] / weightPrameter, 'conv') + + # 针对输入特征图添加剪枝操作 + activationWeightPruneConv = Conv2dTest( + ratio, + child.in_channels, + child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding, + dilation=child.dilation, groups=child.groups, bias=(child.bias is not None), + padding_mode=child.padding_mode + ) + if child.bias is not None: + activationWeightPruneConv.bias = child.bias + activationWeightPruneConv.weight = Parameter(child.weight) + module._modules[name] = activationWeightPruneConv + child._forward_pre_hooks + + elif isinstance(child, nn.Linear): + print(child) + print(child.weight.shape) + print('custers_num=4', 'cut_num=', child.weight.shape[-1] / LinearPrameter, 'pruning_type=full') + fullPruning = PatternPruningMethod(custers_num=8, cut_num=child.weight.shape[-1] / LinearPrameter, + pruning_type='full') + fullPruning.apply(child, 'weight', 8, child.weight.shape[-1] / LinearPrameter, 'full') + child._forward_pre_hooks + else: + activationWeightPruneOp(child) # 这是用来迭代的,Maxpool层的功能是不变的 + if not inplace: + model = copy.deepcopy(model_name) + activationWeightPruneOp( model_name) # 为每一层添加量化操作 + return model + +def getModel(modelName): + if modelName == 'LeNet': + return getLeNet() # 加载原始模型框架 + elif modelName == 'AlexNet': + return getAlexnet() + elif modelName == 'VGG16': + return get_vgg16() + elif modelName == 'SqueezeNet': + return get_squeezenet() + elif modelName == 'ResNet': + return get_resnet18() + +def getDataSet(modelName,batchSize,imgSize): + if modelName == 'VGG16' or modelName == 'AlexNet' or modelName == 'ResNet' or modelName == 'SqueezeNet': + dataloaders, dataset_sizes = load_cifar10(batch_size=batchSize, pth_path='./data', + img_size=imgSize) # 确定数据集 + elif modelName == 'LeNet': + dataloaders, dataset_sizes = load_mnist(batch_size=batchSize, path='./data', img_size=imgSize) + + return dataloaders,dataset_sizes + +def weightPruneModelOp(model_name,batch_size,img_size,ratio,pattern,epoch,weightParameter,LinearParameter): + net = getModel(model_name) + dataloaders, dataset_sizes = getDataSet(model_name,batch_size,img_size) + criterion = nn.CrossEntropyLoss() + if pattern == 'retrain' or pattern == 'train': + if pattern == 'retrain': + getPth = './pth/' + model_name + '/ratio=' +str(ratio)+ '/Activation' + '/best.pth' #读取经过输入特征图剪枝训练后的权重模型 + else: + getPth = './pth/' + model_name + '/ratio=0' + '/Activation' + '/best.pth' + optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # 设置学习率下降策略 + net.load_state_dict(torch.load(getPth)) + weightPrune(net, ratio ,weightParameter,LinearParameter) + train_model_jiang(net,dataloaders, dataset_sizes,ratio,'weight', pattern,criterion=criterion, optimizer=optimizer, name=model_name, + scheduler=scheduler, num_epochs=epoch, rerun=False) + + if pattern == 'test': + getPth = './pth/' + model_name+ '/ratio=' +str(ratio)+ '/ActivationWeight/' + 'best.pth' + weightPrune(net, ratio,weightParameter,LinearParameter) + net.load_state_dict(torch.load(getPth)) + test_model(net, dataloaders, dataset_sizes, criterion=criterion) + + + + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..e8cfdc7 --- /dev/null +++ b/main.py @@ -0,0 +1,48 @@ +from ActivationPrune import * +from WeightPrune import weightPruneModelOp +import os +from Op import Op + +if __name__ == '__main__': + model_name = 'AlexNet' # 确定模型名称 + batch_size = 1 # 确定批训练图片数目 + img_size = 227 # 确定单张图片大小 + ratio = 0.1 # 确定输入特征图剪枝比率 + epochA = 30 # 确定针对输入特征图剪枝重训练轮数或原始模型(不掺杂任何剪枝训练)轮数 + epochAW = 40 # 确定针对卷积核聚类剪枝重训练轮数 + weightParameter = (4/1) + LinearParameter = 4 + ''' + 一共设置有六种针对模型的操作 + 1. operation = 'trainInitialModel',意为训练初始模型,此时不参杂任何剪枝操作,单纯训练初始模型 + 2. operation = 'onlyActivationPruneWithRetrain',意为只针对输入特征图进行剪枝,并进行重训练 + 3. operation = 'onlyWeightPruneWithRetrain',意为只针对权重值进行聚类剪枝,并进行重训练 + 4. operation = 'activationWeightPruneWithRetrain',意为对输入特征图剪枝并进行重训练,对其生成的模型权重进行聚类剪枝并进行重训练 + 5. operation = 'onlyActivationPruneTest',意为只针对输入特征图剪枝后的模型进行inferernce,测试模型精度 + 6. operation = 'activationWeightPruneTest',意为针对输入特征图与权重聚类剪枝后的模型进行inference,测试模型精度 + ''' + operation = 'trainInitialModel' + Op(operation,model_name,batch_size,img_size,ratio,epochA,epochAW,weightParameter,LinearParameter) + + + + + + + + + + + + + + + # if not os.path.exists('./pth/'+model_name+'/ratio='+str(ratio)): # + # os.makedirs('./pth/'+model_name+'/ratio='+str(ratio)+'/Activation') + # if patternA != 'train': + # os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/ActivationWeight') + # os.makedirs('./pth/' + model_name + '/ratio=' + str(ratio) + '/Weight') + # + # # activationPruneModelOp(model_name, batch_size, img_size,patternA,ratio,epochA) + # if patternA != 'train' and not(patternA == 'test' and ratio == 0): + # weightPruneModelOp(model_name, batch_size, img_size, ratio, patternW,epochAW,weightParameter,LinearParameter) \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..7a56d8f --- /dev/null +++ b/model.py @@ -0,0 +1,632 @@ +from collections import OrderedDict +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +import torch.nn.functional as F +import math +import torch + +class AlexNet(nn.Module): + + def __init__(self, num_classes=10): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(96, 256, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(256, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + # nn.Softmax() + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + return x +class LeNet(nn.Module): + def __init__(self, num_classes=10): + super(LeNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 6, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(6, 16, kernel_size=5), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(16, 120, kernel_size=5), + nn.ReLU(inplace=True) + ) + self.classifier = nn.Sequential( + nn.Linear(120, 84), + nn.ReLU(inplace=True), + nn.Linear(84, num_classes) + ) + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + +class VGG(nn.Module): + + def __init__(self, features, num_classes=10): + super(VGG, self).__init__() + self.features = features + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + self._initialize_weights() + + def forward(self, x): + if hasattr(self, "first_input_prune"): + x = self.first_input_prune(x) + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + + +class ResNet(nn.Module): + def __init__(self, block, layers, num_classes=10): + self.inplanes = 64 + super(ResNet, self).__init__() + + m = OrderedDict() + m['conv1'] = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + m['bn1'] = nn.BatchNorm2d(64) + m['relu1'] = nn.ReLU(inplace=True) + m['maxpool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.group1 = nn.Sequential(m) + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + self.avgpool = nn.Sequential(nn.AvgPool2d(7)) + + self.group2 = nn.Sequential( + OrderedDict([ + ('fc', nn.Linear(512 * block.expansion, num_classes)) + ]) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.group1(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.group2(x) + + return x + + +class ResNetBasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(ResNetBasicBlock, self).__init__() + m = OrderedDict() + m['conv1'] = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + m['relu1'] = nn.ReLU(inplace=True) + m['conv2'] = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.group1 = nn.Sequential(m) + self.relu = nn.Sequential(nn.ReLU(inplace=True)) + self.downsample = downsample + + def forward(self, x): + if self.downsample is not None: + residual = self.downsample(x) + else: + residual = x + out = self.group1(x) + residual + out = self.relu(out) + return out + +class Fire(nn.Module): + + def __init__(self, inplanes, squeeze_planes, + expand1x1_planes, expand3x3_planes): + super(Fire, self).__init__() + self.inplanes = inplanes + + self.group1 = nn.Sequential( + OrderedDict([ + ('squeeze', nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)), + ('squeeze_activation', nn.ReLU(inplace=True)) + ]) + ) + + self.group2 = nn.Sequential( + OrderedDict([ + ('expand1x1', nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1)), + ('expand1x1_activation', nn.ReLU(inplace=True)) + ]) + ) + + self.group3 = nn.Sequential( + OrderedDict([ + ('expand3x3', nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)), + ('expand3x3_activation', nn.ReLU(inplace=True)) + ]) + ) + + def forward(self, x): + x = self.group1(x) + return torch.cat([self.group2(x), self.group3(x)], 1) + + +class SqueezeNet(nn.Module): + + def __init__(self, num_classes=1000): + super(SqueezeNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(512, 64, 256, 256), + ) + # Final convolution is initialized differently form the rest + final_conv = nn.Conv2d(512, num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=0.5), + final_conv, + nn.ReLU(inplace=True), + nn.AvgPool2d(13) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + gain = 2.0 + if m is final_conv: + m.weight.data.normal_(0, 0.01) + else: + fan_in = m.kernel_size[0] * m.kernel_size[1] * m.in_channels + u = math.sqrt(3.0 * gain / fan_in) + m.weight.data.uniform_(-u, u) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.features(x) + x = self.classifier(x) + return x.view(x.size(0), self.num_classes) + + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=False, transform_input=False): + super(Inception3, self).__init__() + self.aux_logits = aux_logits + self.transform_input = transform_input + self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3,stride=2) + self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) + self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) + self.Mixed_5b = InceptionA(192, pool_features=32) + self.Mixed_5c = InceptionA(256, pool_features=64) + self.Mixed_5d = InceptionA(288, pool_features=64) + self.Mixed_6a = InceptionB(288) + self.Mixed_6b = InceptionC(768, channels_7x7=128) + self.Mixed_6c = InceptionC(768, channels_7x7=160) + self.Mixed_6d = InceptionC(768, channels_7x7=160) + self.Mixed_6e = InceptionC(768, channels_7x7=192) + if aux_logits: + self.AuxLogits = InceptionAux(768, num_classes) + self.Mixed_7a = InceptionD(768) + self.Mixed_7b = InceptionE(1280) + self.Mixed_7c = InceptionE(2048) + self.group1 = nn.Sequential( + OrderedDict([ + ('fc', nn.Linear(2048, num_classes)) + ]) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + stddev = m.stddev if hasattr(m, 'stddev') else 0.1 + X = stats.truncnorm(-2, 2, scale=stddev) + values = torch.Tensor(X.rvs(m.weight.data.numel())) + m.weight.data.copy_(values.reshape(m.weight.shape)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def forward(self, x): + if self.transform_input: + x = x.clone() + x[0] = x[0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x[1] = x[1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x[2] = x[2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + # 299 x 299 x 3 + x = self.Conv2d_1a_3x3(x) + # 149 x 149 x 32 + x = self.Conv2d_2a_3x3(x) + # 147 x 147 x 32 + x = self.Conv2d_2b_3x3(x) + # 147 x 147 x 64 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # 73 x 73 x 64 + x = self.Conv2d_3b_1x1(x) + # 73 x 73 x 80 + x = self.Conv2d_4a_3x3(x) + # 71 x 71 x 192 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # 35 x 35 x 192 + x = self.Mixed_5b(x) + # 35 x 35 x 256 + x = self.Mixed_5c(x) + # 35 x 35 x 288 + x = self.Mixed_5d(x) + # 35 x 35 x 288 + x = self.Mixed_6a(x) + # 17 x 17 x 768 + x = self.Mixed_6b(x) + # 17 x 17 x 768 + x = self.Mixed_6c(x) + # 17 x 17 x 768 + x = self.Mixed_6d(x) + # 17 x 17 x 768 + x = self.Mixed_6e(x) + # 17 x 17 x 768 + if self.training and self.aux_logits: + aux = self.AuxLogits(x) + # 17 x 17 x 768 + x = self.Mixed_7a(x) + # 8 x 8 x 1280 + x = self.Mixed_7b(x) + # 8 x 8 x 2048 + x = self.Mixed_7c(x) + # 8 x 8 x 2048 + x = F.avg_pool2d(x, kernel_size=8) + # 1 x 1 x 2048 + x = F.dropout(x, training=self.training) + # 1 x 1 x 2048 + x = x.view(x.size(0), -1) + # 2048 + x = self.group1(x) + # 1000 (num_classes) + if self.training and self.aux_logits: + return x, aux + return x + + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features): + super(InceptionA, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 64, kernel_size=1) + + self.branch5x5_1 = BasicConv2d(in_channels, 48, kernel_size=1) + self.branch5x5_2 = BasicConv2d(48, 64, kernel_size=5, padding=2) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, padding=1) + + self.branch_pool = BasicConv2d(in_channels, pool_features, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionB(nn.Module): + + def __init__(self, in_channels): + super(InceptionB, self).__init__() + self.branch3x3 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = BasicConv2d(96, 96, kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + + outputs = [branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7): + super(InceptionC, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 192, kernel_size=1) + + c7 = channels_7x7 + self.branch7x7_1 = BasicConv2d(in_channels, c7, kernel_size=1) + self.branch7x7_2 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = BasicConv2d(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + + self.branch7x7dbl_1 = BasicConv2d(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + + self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionD(nn.Module): + + def __init__(self, in_channels): + super(InceptionD, self).__init__() + self.branch3x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) + self.branch3x3_2 = BasicConv2d(192, 320, kernel_size=3, stride=2) + + self.branch7x7x3_1 = BasicConv2d(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = BasicConv2d(192, 192, kernel_size=3, stride=2) + + def forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionE(nn.Module): + + def __init__(self, in_channels): + super(InceptionE, self).__init__() + self.branch1x1 = BasicConv2d(in_channels, 320, kernel_size=1) + + self.branch3x3_1 = BasicConv2d(in_channels, 384, kernel_size=1) + self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch3x3dbl_1 = BasicConv2d(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch_pool = BasicConv2d(in_channels, 192, kernel_size=1) + + def forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes): + super(InceptionAux, self).__init__() + self.conv0 = BasicConv2d(in_channels, 128, kernel_size=1) + self.conv1 = BasicConv2d(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + + fc = nn.Linear(768, num_classes) + fc.stddev = 0.001 + + self.group1 = nn.Sequential( + OrderedDict([ + ('fc', fc) + ]) + ) + + def forward(self, x): + # 17 x 17 x 768 + x = F.avg_pool2d(x, kernel_size=5, stride=3) + # 5 x 5 x 768 + x = self.conv0(x) + # 5 x 5 x 128 + x = self.conv1(x) + # 1 x 1 x 768 + x = x.view(x.size(0), -1) + # 768 + x = self.group1(x) + # 1000 + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.group1 = nn.Sequential( + OrderedDict([ + ('conv', nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)) + # ,('bn', nn.BatchNorm2d(out_channels, eps=0.001)) + ]) + ) + + def forward(self, x): + x = self.group1(x) + return F.relu(x, inplace=True) + +def vgg_make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + + +def getLeNet(num_classes=10): + model = LeNet(num_classes) + return model + +def getAlexnet(num_classes=10): + model = AlexNet(num_classes) + return model + +def get_vgg16(num_classes=10): + vgg16_setting = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] + model = VGG(vgg_make_layers(vgg16_setting), num_classes) + return model + + +def get_resnet18(num_classes=10): + model = ResNet(ResNetBasicBlock, [2, 2, 2, 2], num_classes) + return model + + +def get_squeezenet(num_classes=10): + model = SqueezeNet(num_classes) + return model + + +def get_inception_v3(num_classes=10): + model = Inception3(num_classes) + return model + + diff --git a/train.py b/train.py new file mode 100644 index 0000000..3b726f0 --- /dev/null +++ b/train.py @@ -0,0 +1,289 @@ +from __future__ import print_function, division +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +import numpy as np +import torchvision +from torchvision import datasets, models, transforms +import matplotlib.pyplot as plt +import time +import os +import copy +from tqdm import tqdm +from collections import OrderedDict + +def download_mnist(save_path): + torchvision.datasets.MNIST(root=save_path,train=True,download=True) + torchvision.datasets.MNIST(root=save_path,train=False,download=True) + return save_path + +def load_mnist(batch_size=64,path='',img_size=32): + if img_size != 32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()] + ) + else: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + trainset = torchvision.datasets.MNIST(root=path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.MNIST(root=path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":60000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar10(save_path): + torchvision.datasets.CIFAR10(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR10(root=save_path,train=False,download=True) + return save_path + +def load_cifar10(batch_size=64,pth_path='./data',img_size=32): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR10(root=pth_path, train=True,download=False, transform=transform) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2) + testset = torchvision.datasets.CIFAR10(root=pth_path, train=False,download=False, transform=test_transform) + testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_sizes = {"train":50000,"val":10000} + return dataloaders,dataset_sizes + +def download_cifar100(save_path): + torchvision.datasets.CIFAR100(root=save_path,train=True,download=True) + torchvision.datasets.CIFAR100(root=save_path,train=False,download=False) + return save_path + +def load_cifar100(batch_size,pth_path,img_size): + if img_size!=32: + transform = transforms.Compose( + [transforms.Resize((img_size,img_size)), + transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.Resize((img_size,img_size)) + ,transforms.ToTensor()]) + else: + transform = transforms.Compose([transforms.Pad(padding = 4), + transforms.RandomCrop(32), + transforms.RandomHorizontalFlip(),transforms.ToTensor()]) + test_transform = transforms.Compose([transforms.ToTensor()]) + trainset = torchvision.datasets.CIFAR100(root=pth_path,train=True,download=False,transform=transform) + trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True,num_workers=2) + testset = torchvision.datasets.CIFAR100(root=pth_path,train=False,download=False,transform=test_transform) + testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=True,num_workers=2) + dataloaders = {"train":trainloader,"val":testloader} + dataset_size ={"train":50000,"val":10000} + return dataloaders,dataset_size +def test_model(model,dataloaders,dataset_sizes,criterion): + print("validation model:") + phase = "val" + model.cuda() + model.eval() + with torch.no_grad(): + running_loss = 0.0 + running_acc = 0.0 + for inputs,labels in tqdm(dataloaders[phase]): + inputs,labels = inputs.cuda(),labels.cuda() + outputs = model(inputs) + _,preds = torch.max(outputs,1) + loss = criterion(outputs,labels) + running_loss += loss.item() * inputs.size(0) + running_acc += torch.sum(preds == labels.data) + epoch_loss = running_loss/dataset_sizes[phase] + epoch_acc = running_acc / dataset_sizes[phase] + epoch_acc = epoch_acc.item() + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + return epoch_acc,epoch_loss + +def WriteData(savePath, msg): + + full_path = savePath + '/Accuracy.txt' # 也可以创建一个.doc的word文档 + file = open(full_path, 'a') + file.write(msg) #msg也就是下面的Hello world! + # file.close() + +def train_model_jiang(model, dataloaders, dataset_sizes,ratio, type,pattern,criterion, optimizer, name,scheduler=None, num_epochs=100,rerun=False): + if rerun == True: + print('我进来了') + print(num_epochs) + since = time.time() + model.load_state_dict(torch.load('./test_20.pth')) + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + + model.cuda() + for epoch in range(20, num_epochs): + print('Epoch {}/{}'.format(epoch + 1, num_epochs)) + print('-' * 10) + print('the %d lr:%f' % (epoch + 1, optimizer.param_groups[0]['lr'])) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + i = 0 + loss_a = 0 + p = 0 + for data in dataloaders[phase]: + inputs, labels = data + inputs = inputs.cuda() + labels = labels.cuda() + + # zero the parameter gradients + optimizer.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f' % (epoch + 1, i + 1, loss_a)) + loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = './test_{}.pth'.format(epoch+1) + torch.save(model.state_dict(), path) + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = './best.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + + if rerun == False: + since = time.time() + best_model_wts = copy.deepcopy(model.state_dict()) + best_acc = 0.0 + if type == 'activation': + savePth = './pth/'+name+'/ratio='+str(ratio)+'/Activation' + else: + if pattern == 'retrain': + savePth = './pth/'+name+'/ratio='+str(ratio)+'/ActivationWeight' + elif pattern == 'train': + savePth = './pth/' + name + '/ratio=' + str(ratio) + '/Weight' + model.cuda() + WriteData(savePth,'ratio='+str(ratio)+'\n') + for epoch in range(num_epochs): + print('Epoch {}/{}'.format(epoch+1, num_epochs)) + print('-' * 10) + print('the %d lr:%f'%(epoch+1,optimizer.param_groups[0]['lr'])) + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model.train() # Set model to training mode + else: + print('val stage') + model.eval() # Set model to evaluate mode + running_loss = 0.0 + running_corrects = 0 + # Iterate over data. + i = 0 + # loss_a = 0 + # p = 0 + for data in dataloaders[phase]: + inputs,labels = data + inputs = inputs.cuda() + labels = labels.cuda() + # zero the parameter gradients + optimizer.zero_grad() + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + loss_a = loss.item() + print('[%d ,%5d] loss:%.3f'%(epoch+1,i+1,loss_a)) + # loss_a = 0 + i += 1 + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer.step() + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + if phase == 'train' and scheduler is not None: + scheduler.step() + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + # epoch_loss = running_loss / p + # epoch_acc = running_corrects.double() / p + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + # deep copy the model + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model.state_dict()) + model.load_state_dict(best_model_wts) + path = savePth+'/test_{}.pth'.format(epoch + 1) + torch.save(model.state_dict(), path) + WriteData(savePth, str((round(float(epoch_acc),4))*100) + '%-' +'epoch=' +str(epoch)+'\n') + + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + + # load best model weights + model.load_state_dict(best_model_wts) + path = savePth + '/best.pth' + torch.save(model.state_dict(), path) + return model