diff --git a/ActivationPrune.py b/ActivationPrune.py
new file mode 100644
index 0000000..a3a47b2
--- /dev/null
+++ b/ActivationPrune.py
@@ -0,0 +1,123 @@
+import copy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Function
+import time
+from model import *
+from train import *
+import random
+# from .model import ResNetBasicBlock
+
+from math import sqrt
+import copy
+from time import time
+from Conv2dNew import Execution
+
+class Conv2dTest(nn.Conv2d):
+    def __init__(self,
+                 ratio,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 padding_mode='zeros',
+                 ):
+        super(Conv2dTest, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups,
+                                          bias, padding_mode)
+        self.ratio = ratio
+    def forward(self, input):
+        E = Execution(self.ratio)
+        output = E.conv2d(input, self.weight, self.bias, self.stride, self.padding)
+        return output
+
+class LinearTest(nn.Linear):
+    def __init__(self,
+                 in_features,
+                 out_features,
+                 bias=True,
+                 ):
+        super(LinearTest, self).__init__(in_features, out_features, bias)
+
+    def forward(self, input):
+        output = F.linear(input, self.weight, self.bias)
+        return output
+
+def prepare(model, ratio,inplace=False):
+    # move intpo prepare
+    def addActivationPruneOp(module):
+        nonlocal layer_cnt
+        for name, child in module.named_children():
+            if isinstance(child, nn.Conv2d):
+                p_name = str(layer_cnt)
+                activationPruneConv = Conv2dTest(
+                    ratio,
+                    child.in_channels,
+                    child.out_channels, child.kernel_size, stride=child.stride, padding=child.padding,
+                    dilation=child.dilation, groups=child.groups, bias=(child.bias is not None),
+                    padding_mode=child.padding_mode
+                )
+                if child.bias is not None:
+                    activationPruneConv.bias = child.bias
+                activationPruneConv.weight = child.weight
+                module._modules[name] = activationPruneConv
+                layer_cnt += 1
+            elif isinstance(child, nn.Linear):
+                p_name = str(layer_cnt)
+                activationPruneLinear = LinearTest(
+                     child.in_features, child.out_features,
+                    bias=(child.bias is not None)
+                )
+                if child.bias is not None:
+                    activationPruneLinear.bias = child.bias
+                activationPruneLinear.weight = child.weight
+                module._modules[name] = activationPruneLinear
+                layer_cnt += 1
+            else:
+                addActivationPruneOp(child)  # 这是用来迭代的，Maxpool层的功能是不变的
+    layer_cnt = 0
+    if not inplace:
+        model = copy.deepcopy(model)
+    addActivationPruneOp( model)  # 为每一层添加量化操作
+    return model
+
+def getPruneModel(model_name, weight_file_path,pattern,ratio):
+    if model_name == 'LeNet':
+        model_orign = getLeNet()  # 加载原始模型框架
+    elif model_name == 'AlexNet':
+        model_orign = getAlexnet()
+
+    if pattern == 'test':
+        model_orign.load_state_dict(torch.load(weight_file_path))  # 原始模型框架加载模型信息
+    activationPruneModel = prepare(model_orign,ratio)  # 将原始模型转化成量化后的模型，即给每一个卷积层和线形层增加量化剪枝操作
+
+    return activationPruneModel
+
+def activationPruneModelOp(model_name, weight_file_path, batch_size, img_size,pattern,ratio):
+    '''
+    :param model_name: 要训练的模型名称
+    :param weight_file_path: 权重文件的地址
+    :param batch_size: 训练时一个batch的大小
+    :param img_size: 数据集中图片大小的要求
+    :param pattern: 模式选择，是训练模式还是推理模式
+    :return:
+    '''
+    if model_name == 'VGG16' or model_name == 'AlexNet' or model_name == 'ResNet' or model_name == 'vgg16_thu' or model_name == 'SqueezeNet':
+        dataloaders, dataset_sizes = load_cifar10(batch_size=batch_size, pth_path='./data',
+                                                  img_size=img_size)  # 确定数据集
+    elif model_name == 'LeNet':
+        dataloaders, dataset_sizes = load_mnist(batch_size=batch_size, path='./data', img_size=img_size)
+
+    activationPruneModel = getPruneModel(model_name, weight_file_path,pattern,ratio)
+    criterion = nn.CrossEntropyLoss()
+    if pattern == 'train':
+        optimizer = optim.SGD(activationPruneModel.parameters(), lr=0.01, momentum=0.9)
+        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8)  # 设置学习率下降策略
+        train_model_jiang(activationPruneModel, dataloaders, dataset_sizes, criterion=criterion, optimizer=optimizer, name='SqueezeNet_5',
+                          scheduler=scheduler, num_epochs=30, rerun=False)  # 进行模型的训练
+    elif pattern == 'test':
+        test_model(activationPruneModel,dataloaders, dataset_sizes,criterion=criterion)
diff --git a/Conv2dNew.py b/Conv2dNew.py
new file mode 100644
index 0000000..0513a0f
--- /dev/null
+++ b/Conv2dNew.py
@@ -0,0 +1,181 @@
+import math
+import numpy as np
+import copy
+import torch
+def determine_padding(filter_shape, output_shape="same"):
+    '''
+    :param filter_shape:
+    :param output_shape:
+    :return:
+    '''
+    # No padding
+    if output_shape == "valid":
+        return (0, 0), (0, 0)
+    # Pad so that the output shape is the same as input shape (given that stride=1)
+    elif output_shape == "same":
+        filter_height, filter_width = filter_shape
+
+        # Derived from:
+        # output_height = (height + pad_h - filter_height) / stride + 1
+        # In this case output_height = height and stride = 1. This gives the
+        # expression for the padding below.
+        pad_h1 = int(math.floor((filter_height - 1)/2))
+        pad_h2 = int(math.ceil((filter_height - 1)/2))
+        pad_w1 = int(math.floor((filter_width - 1)/2))
+        pad_w2 = int(math.ceil((filter_width - 1)/2))
+    else:
+        pad_h1 = output_shape[0]
+        pad_h2 = output_shape[0]
+        pad_w1 = output_shape[1]
+        pad_w2 = output_shape[1]
+
+        return (pad_h1, pad_h2), (pad_w1, pad_w2)
+
+def image_to_column(images, filter_shape, stride, output_shape='same'):
+    filter_height, filter_width = filter_shape
+    pad_h, pad_w = determine_padding(filter_shape, output_shape)# Add padding to the image
+    images_padded = torch.nn.functional.pad(images, [1,1,1,1], mode='constant')# Calculate the indices where the dot products are to be applied between weights
+    # and the image
+    k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride)
+
+    # Get content from image at those indices
+    cols = images_padded[:, k, i, j]
+    channels = images.shape[1]
+    # Reshape content into column shape
+    # cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
+    cols = cols.permute(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
+
+    return cols
+
+def get_im2col_indices(images_shape, filter_shape, padding, stride=1):
+    # First figure out what the size of the output should be
+    batch_size, channels, height, width = images_shape
+    filter_height, filter_width = filter_shape
+    pad_h, pad_w = padding
+    out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1)
+    out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1)
+
+    i0 = np.repeat(np.arange(filter_height), filter_width)
+    i0 = np.tile(i0, channels)
+    i1 = stride * np.repeat(np.arange(out_height), out_width)
+    j0 = np.tile(np.arange(filter_width), filter_height * channels)
+    j1 = stride * np.tile(np.arange(out_width), out_height)
+    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
+    j = j0.reshape(-1, 1) + j1.reshape(1, -1)
+    k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1)
+    return (k, i, j)
+
+class Layer(object):
+
+    def set_input_shape(self, shape):
+        """ Sets the shape that the layer expects of the input in the forward
+        pass method """
+        self.input_shape = shape
+
+    def layer_name(self):
+        """ The name of the layer. Used in model summary. """
+        return self.__class__.__name__
+
+    def parameters(self):
+        """ The number of trainable parameters used by the layer """
+        return 0
+
+    def forward_pass(self, X, training):
+        """ Propogates the signal forward in the network """
+        raise NotImplementedError()
+
+    def backward_pass(self, accum_grad):
+        """ Propogates the accumulated gradient backwards in the network.
+        If the has trainable weights then these weights are also tuned in this method.
+        As input (accum_grad) it receives the gradient with respect to the output of the layer and
+        returns the gradient with respect to the output of the previous layer. """
+        raise NotImplementedError()
+
+    def output_shape(self):
+        """ The shape of the output produced by forward_pass """
+        raise NotImplementedError()
+
+class Execution(Layer):
+    """A 2D Convolution Layer.
+    Parameters:
+    -----------
+    n_filters: int
+        The number of filters that will convolve over the input matrix. The number of channels
+        of the output shape.
+    filter_shape: tuple
+        A tuple (filter_height, filter_width).
+    input_shape: tuple
+        The shape of the expected input of the layer. (batch_size, channels, height, width)
+        Only needs to be specified for first layer in the network.
+    padding: string
+        Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width
+        matches the input height and width. For 'valid' no padding is added.
+    stride: int
+        The stride length of the filters during the convolution over the input.
+    """
+    def __init__(self,ratio):
+        self.ratio = ratio
+        pass
+
+    def conv2d(self, input,weight,bias,stride,padding):
+        self.input = input
+        self.weight = weight
+        self.bias = bias
+        self.stride = stride
+        self.padding = padding
+
+        self.n_filters = self.weight.shape[0]  # 卷积核的个数
+        self.filter_shape = (self.weight.shape[2], self.weight.shape[3])
+        self.input_shape = [self.input.shape[1],self.input.shape[2],self.input.shape[3]]
+        self.trainable = False
+
+        batch_size, channels, height, width = self.input.shape
+        # Turn image shape into column shape
+        # (enables dot product between input and weights)
+        self.X_col = image_to_column(self.input, self.filter_shape, stride=self.stride[0], output_shape=self.padding)
+        # Turn weights into column shape
+        if self.ratio != 0:
+            compareRatio = math.ceil(self.ratio * self.X_col.shape[0])
+            self.X_col = self.activationSlidePrune(self.X_col,compareRatio)
+        self.W_col = self.weight.reshape((self.n_filters, -1))
+        # Calculate output
+        output = torch.einsum('ij,jk->ik',self.W_col,self.X_col) + torch.unsqueeze(self.bias,1)
+        # Reshape into (n_filters, out_height, out_width, batch_size)
+        output = output.reshape(self.output_shape() + (batch_size, ))
+        # Redistribute axises so that batch size comes first
+        return output.permute(3,0,1,2)
+
+    def output_shape(self):
+        channels, height, width = self.input_shape
+        pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding)
+        output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride[0] + 1
+        output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride[0] + 1
+        return self.n_filters, int(output_height), int(output_width)
+
+    def parameters(self):
+        return np.prod(self.W.shape) + np.prod(self.w0.shape)
+
+    def activationSlidePrune(self,input,compareRatio):
+        matrixOne = torch.ones(input.shape,device='cuda:0')
+        andOp = torch.logical_and(matrixOne,input)
+        andSum = torch.sum(andOp,dim=0)
+
+        # pruneNumber = 0
+        # zerosNumber = 0
+        for i in range(len(andSum)):
+            if andSum[i] <= compareRatio and andSum[i]!=0:
+                for m in range(input.shape[0]):
+                    input[m][i] = 0
+                # pruneNumber += 1
+            # if andSum[i] == 0:
+                # zerosNumber += 1
+        # print('pruneNumberRatio=',pruneNumber / (input.shape[1]))
+        # print('zerosNumberRatio=',zerosNumber / (input.shape[1]))
+        return input
+
+# image = np.random.randint(0,255,size=(1,3,32,32)).astype(np.uint8)
+# input_shape=image.squeeze().shape
+# conv2d = Conv2D(16, (3,3), input_shape=input_shape, padding='same', stride=1)
+# conv2d.initialize(None)
+# output=conv2d.forward_pass(image,training=True)
+# print(output.shape)
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..18729c0
--- /dev/null
+++ b/main.py
@@ -0,0 +1,11 @@
+from ActivationPrune import *
+
+
+if __name__ == '__main__':
+    weight_file_path = './pth/LeNet/LeNet.pth'
+    model_name = 'LeNet'
+    batch_size = 64
+    img_size = 32
+    pattern = 'test'  #pattern='test' or pattern='train'
+    ratio = 0.4  #ratio=0为训练模式
+    activationPruneModelOp(model_name, weight_file_path, batch_size, img_size,pattern,ratio)
\ No newline at end of file
diff --git a/model.py b/model.py
new file mode 100644
index 0000000..d79cd91
--- /dev/null
+++ b/model.py
@@ -0,0 +1,123 @@
+from collections import OrderedDict
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+import torch.nn.functional as F
+import math
+import torch
+
+class ThuAlexNet(nn.Module):
+    def __init__(self, num_classes=10, drop_rate=0.5):
+        super(ThuAlexNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(64, 192, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(192, 384, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(384, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.classifier = nn.Sequential(
+            nn.Dropout(drop_rate),
+            # nn.Linear(256 * 6 * 6, 4096),
+            nn.Linear(1024, 4096),
+            nn.ReLU(inplace=True),
+            nn.Dropout(drop_rate),
+            nn.Linear(4096, 4096),
+            nn.ReLU(inplace=True),
+            nn.Linear(4096, num_classes),
+        )
+
+    def forward(self, x):
+        if hasattr(self, "first_input_prune"):
+            x = self.first_input_prune(x)
+        x = self.features(x)
+        x = x.view(x.size(0), -1)
+        x = self.classifier(x)
+        return x
+class AlexNet(nn.Module):
+
+    def __init__(self, num_classes=10):
+        super(AlexNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(96, 256, kernel_size=5, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+            nn.Conv2d(256, 384, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(384, 384, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(384, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        )
+        self.classifier = nn.Sequential(
+            nn.Dropout(),
+            nn.Linear(256 * 6 * 6, 4096),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(4096, 4096),
+            nn.ReLU(inplace=True),
+            nn.Linear(4096, num_classes),
+            # nn.Softmax()
+        )
+
+    def forward(self, x):
+        if hasattr(self, "first_input_prune"):
+            x = self.first_input_prune(x)
+        x = self.features(x)
+        x = x.view(x.size(0), 256 * 6 * 6)
+        x = self.classifier(x)
+        return x
+class LeNet(nn.Module):
+    def __init__(self, num_classes=10):
+        super(LeNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(1, 6, kernel_size=5),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(6, 16, kernel_size=5),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(16, 120, kernel_size=5),
+            nn.ReLU(inplace=True)
+        )
+        self.classifier = nn.Sequential(
+            nn.Linear(120, 84),
+            nn.ReLU(inplace=True),
+            nn.Linear(84, num_classes)
+        )
+
+    def forward(self, x):
+        if hasattr(self, "first_input_prune"):
+            x = self.first_input_prune(x)
+        x = self.features(x)
+        x = x.view(x.size(0), -1)
+        x = self.classifier(x)
+        return x
+
+
+
+
+
+def getLeNet(num_classes=10):
+    model = LeNet(num_classes)
+    return model
+
+def getAlexnet(num_classes=10):
+    model = AlexNet(num_classes)
+    return model
+
+
+
+
+
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..4135d5f
--- /dev/null
+++ b/train.py
@@ -0,0 +1,283 @@
+from __future__ import print_function, division
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.optim import lr_scheduler
+import numpy as np
+import torchvision
+from torchvision import datasets, models, transforms
+import matplotlib.pyplot as plt
+import time
+import os
+import copy
+from tqdm import tqdm
+from collections import OrderedDict
+
+def download_mnist(save_path):
+    torchvision.datasets.MNIST(root=save_path,train=True,download=True)
+    torchvision.datasets.MNIST(root=save_path,train=False,download=True)
+    return save_path
+
+def load_mnist(batch_size=64,path='',img_size=32):
+    if img_size != 32:
+        transform = transforms.Compose(
+            [transforms.Resize((img_size,img_size)),
+            transforms.ToTensor()])
+        test_transform = transforms.Compose(
+            [transforms.Resize((img_size,img_size)),
+            transforms.ToTensor()]
+        )
+    else:
+        transform = transforms.Compose(
+            [transforms.Resize((img_size,img_size)),
+            transforms.ToTensor()])
+        test_transform = transforms.Compose(
+            [transforms.Resize((img_size,img_size)),
+            transforms.ToTensor()])
+    trainset = torchvision.datasets.MNIST(root=path,train=True,download=False,transform=transform)
+    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2)
+    testset = torchvision.datasets.MNIST(root=path,train=False,download=False,transform=test_transform)
+    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2)
+    dataloaders = {"train":trainloader,"val":testloader}
+    dataset_sizes = {"train":60000,"val":10000}
+    return dataloaders,dataset_sizes
+
+def download_cifar10(save_path):
+    torchvision.datasets.CIFAR10(root=save_path,train=True,download=True)
+    torchvision.datasets.CIFAR10(root=save_path,train=False,download=True)
+    return save_path
+
+def load_cifar10(batch_size=64,pth_path='./data',img_size=32):
+    if img_size!=32:
+        transform = transforms.Compose(
+            [transforms.Resize((img_size,img_size)),
+            transforms.ToTensor()])
+        test_transform = transforms.Compose([transforms.Resize((img_size,img_size))
+            ,transforms.ToTensor()])
+    else:
+        transform = transforms.Compose([transforms.Pad(padding = 4),
+            transforms.RandomCrop(32),
+            transforms.RandomHorizontalFlip(),transforms.ToTensor()])
+        test_transform = transforms.Compose([transforms.ToTensor()])
+    trainset = torchvision.datasets.CIFAR10(root=pth_path, train=True,download=False, transform=transform)
+    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=2)
+    testset = torchvision.datasets.CIFAR10(root=pth_path, train=False,download=False, transform=test_transform)
+    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=False, num_workers=2)
+    dataloaders = {"train":trainloader,"val":testloader}
+    dataset_sizes = {"train":50000,"val":10000}
+    return dataloaders,dataset_sizes
+
+def download_cifar100(save_path):
+    torchvision.datasets.CIFAR100(root=save_path,train=True,download=True)
+    torchvision.datasets.CIFAR100(root=save_path,train=False,download=False)
+    return save_path
+
+def load_cifar100(batch_size,pth_path,img_size):
+    if img_size!=32:
+        transform = transforms.Compose(
+            [transforms.Resize((img_size,img_size)),
+            transforms.ToTensor()])
+        test_transform = transforms.Compose([transforms.Resize((img_size,img_size))
+            ,transforms.ToTensor()])
+    else:
+        transform = transforms.Compose([transforms.Pad(padding = 4),
+            transforms.RandomCrop(32),
+            transforms.RandomHorizontalFlip(),transforms.ToTensor()])
+        test_transform = transforms.Compose([transforms.ToTensor()])
+    trainset = torchvision.datasets.CIFAR100(root=pth_path,train=True,download=False,transform=transform)
+    trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True,num_workers=2)
+    testset = torchvision.datasets.CIFAR100(root=pth_path,train=False,download=False,transform=test_transform)
+    testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=True,num_workers=2)
+    dataloaders = {"train":trainloader,"val":testloader}
+    dataset_size ={"train":50000,"val":10000}
+    return dataloaders,dataset_size
+def test_model(model,dataloaders,dataset_sizes,criterion):
+    print("validation model:")
+    phase = "val"
+    model.cuda()
+    model.eval()
+    with torch.no_grad():
+        running_loss = 0.0
+        running_acc = 0.0
+        for inputs,labels in tqdm(dataloaders[phase]):
+            inputs,labels = inputs.cuda(),labels.cuda()
+            outputs = model(inputs)
+            _,preds = torch.max(outputs,1)
+            loss = criterion(outputs,labels)
+            running_loss += loss.item() * inputs.size(0)
+            running_acc += torch.sum(preds == labels.data)
+        epoch_loss = running_loss/dataset_sizes[phase]
+        epoch_acc = running_acc / dataset_sizes[phase]
+        epoch_acc = epoch_acc.item()
+        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
+                phase, epoch_loss, epoch_acc))
+    return epoch_acc,epoch_loss
+
+
+def train_model_jiang(model, dataloaders, dataset_sizes, criterion, optimizer, name,scheduler=None, num_epochs=100,rerun=False):
+    if rerun == True:
+        print('我进来了')
+        print(num_epochs)
+        since = time.time()
+        model.load_state_dict(torch.load('./test_20.pth'))
+        best_model_wts = copy.deepcopy(model.state_dict())
+        best_acc = 0.0
+
+        model.cuda()
+        for epoch in range(20, num_epochs):
+            print('Epoch {}/{}'.format(epoch + 1, num_epochs))
+            print('-' * 10)
+            print('the %d lr:%f' % (epoch + 1, optimizer.param_groups[0]['lr']))
+
+            # Each epoch has a training and validation phase
+            for phase in ['train', 'val']:
+                if phase == 'train':
+                    model.train()  # Set model to training mode
+                else:
+                    print('val stage')
+                    model.eval()  # Set model to evaluate mode
+
+                running_loss = 0.0
+                running_corrects = 0
+
+                # Iterate over data.
+                i = 0
+                loss_a = 0
+                p = 0
+                for data in dataloaders[phase]:
+                    inputs, labels = data
+                    inputs = inputs.cuda()
+                    labels = labels.cuda()
+
+                    # zero the parameter gradients
+                    optimizer.zero_grad()
+
+                    # forward
+                    # track history if only in train
+                    with torch.set_grad_enabled(phase == 'train'):
+                        outputs = model(inputs)
+                        _, preds = torch.max(outputs, 1)
+                        loss = criterion(outputs, labels)
+                        loss_a = loss.item()
+                        print('[%d ,%5d] loss:%.3f' % (epoch + 1, i + 1, loss_a))
+                        loss_a = 0
+                        i += 1
+                        # backward + optimize only if in training phase
+                        if phase == 'train':
+                            loss.backward()
+                            optimizer.step()
+
+                    # statistics
+                    running_loss += loss.item() * inputs.size(0)
+                    running_corrects += torch.sum(preds == labels.data)
+                if phase == 'train' and scheduler is not None:
+                    scheduler.step()
+
+                epoch_loss = running_loss / dataset_sizes[phase]
+                epoch_acc = running_corrects.double() / dataset_sizes[phase]
+                #                 epoch_loss = running_loss / p
+                #                 epoch_acc = running_corrects.double() / p
+
+                print('{} Loss: {:.4f} Acc: {:.4f}'.format(
+                    phase, epoch_loss, epoch_acc))
+
+                # deep copy the model
+                if phase == 'val' and epoch_acc > best_acc:
+                    best_acc = epoch_acc
+                    best_model_wts = copy.deepcopy(model.state_dict())
+                    model.load_state_dict(best_model_wts)
+                    path = './test_{}.pth'.format(epoch+1)
+                    torch.save(model.state_dict(), path)
+
+        time_elapsed = time.time() - since
+        print('Training complete in {:.0f}m {:.0f}s'.format(
+            time_elapsed // 60, time_elapsed % 60))
+        print('Best val Acc: {:4f}'.format(best_acc))
+
+        # load best model weights
+        model.load_state_dict(best_model_wts)
+        path = './best.pth'.format(epoch + 1)
+        torch.save(model.state_dict(), path)
+
+    if rerun == False:
+        since = time.time()
+
+        best_model_wts = copy.deepcopy(model.state_dict())
+        best_acc = 0.0
+
+        model.cuda()
+        for epoch in range(num_epochs):
+            print('Epoch {}/{}'.format(epoch+1, num_epochs))
+            print('-' * 10)
+            print('the %d lr:%f'%(epoch+1,optimizer.param_groups[0]['lr']))
+
+            # Each epoch has a training and validation phase
+            for phase in ['train', 'val']:
+                if phase == 'train':
+                    model.train()  # Set model to training mode
+                else:
+                    print('val stage')
+                    model.eval()  # Set model to evaluate mode
+
+                running_loss = 0.0
+                running_corrects = 0
+
+                # Iterate over data.
+                i = 0
+                loss_a = 0
+                p = 0
+                for data in dataloaders[phase]:
+                    inputs,labels = data
+                    inputs = inputs.cuda()
+                    labels = labels.cuda()
+
+                    # zero the parameter gradients
+                    optimizer.zero_grad()
+
+                    # forward
+                    # track history if only in train
+                    with torch.set_grad_enabled(phase == 'train'):
+                        outputs = model(inputs)
+                        _, preds = torch.max(outputs, 1)
+                        loss = criterion(outputs, labels)
+                        loss_a = loss.item()
+                        print('[%d ,%5d] loss:%.3f'%(epoch+1,i+1,loss_a))
+                        loss_a = 0
+                        i += 1
+                        # backward + optimize only if in training phase
+                        if phase == 'train':
+                            loss.backward()
+                            optimizer.step()
+
+                    # statistics
+                    running_loss += loss.item() * inputs.size(0)
+                    running_corrects += torch.sum(preds == labels.data)
+                if phase == 'train' and scheduler is not None:
+                    scheduler.step()
+
+                epoch_loss = running_loss / dataset_sizes[phase]
+                epoch_acc = running_corrects.double() / dataset_sizes[phase]
+                # epoch_loss = running_loss / p
+                # epoch_acc = running_corrects.double() / p
+
+                print('{} Loss: {:.4f} Acc: {:.4f}'.format(
+                    phase, epoch_loss, epoch_acc))
+
+                # deep copy the model
+                if phase == 'val' and epoch_acc > best_acc:
+                    best_acc = epoch_acc
+                    best_model_wts = copy.deepcopy(model.state_dict())
+                    model.load_state_dict(best_model_wts)
+                    path = './test_{}.pth'.format(epoch + 1)
+                    torch.save(model.state_dict(), path)
+
+        time_elapsed = time.time() - since
+        print('Training complete in {:.0f}m {:.0f}s'.format(
+            time_elapsed // 60, time_elapsed % 60))
+        print('Best val Acc: {:4f}'.format(best_acc))
+
+        # load best model weights
+        model.load_state_dict(best_model_wts)
+        path = './pth/Lenet/LeNet.pth'
+        torch.save(model.state_dict(), path)
+    return model
diff --git "a/\350\213\245\345\271\262\346\226\207\344\273\266\343\200\201\345\207\275\346\225\260\344\270\216\345\207\275\346\225\260\345\217\202\346\225\260\347\232\204\350\257\264\346\230\216.md" "b/\350\213\245\345\271\262\346\226\207\344\273\266\343\200\201\345\207\275\346\225\260\344\270\216\345\207\275\346\225\260\345\217\202\346\225\260\347\232\204\350\257\264\346\230\216.md"
new file mode 100644
index 0000000..c761fb1
--- /dev/null
+++ "b/\350\213\245\345\271\262\346\226\207\344\273\266\343\200\201\345\207\275\346\225\260\344\270\216\345\207\275\346\225\260\345\217\202\346\225\260\347\232\204\350\257\264\346\230\216.md"
@@ -0,0 +1,23 @@
+# 一、main.py
+## 1. main函数参数说明
+1. weight\_file\_path-读取的权重文件的地址。
+2. model_name-进行运算的模型的名称。
+3. batch_size-批处理图像的数目。
+4. img_size-单次处理图像的大小。
+5. pattern-运算模式：  
+	（1）pattern='test'时为检测输入特征图进行剪枝后的精度，不涉及到模型训练。  
+	（2）pattern='train'时为训练初始模型，此时不对输入特征图进行剪枝，下个参数ratio设为0。
+6. ratio-剪枝率。将输入特征图转化成大小为m*n的矩阵后，对每一列的数据进行剪枝。此时ratio的作用是确定剪枝的阈值。以ratio=0.2为例，当某一列中的非0值数据个数<=ratio\*m时，对该列进行剪枝，即数据清零。 
+# 二、Conv2dNew.py
+## 1. Conv2dNew.py文件作用 
+&emsp;在本设计中，暂时主要考虑对卷积层的输入特征图进行剪枝操作。在原始的模型训练代码中，输入特征图的img2col展开是采用C语言编写的，作为底层代码被调用，因此需要编写代码复现卷积操作，即复现F.conv2d()函数。
+## 2.函数determine_padding函数作用
+&emsp;根据输入的padding模式或padding数值对原输入特征图进行padding操作。
+## 3.函数image\_to_column函数作用
+&emsp;实现输入特征图的img2col操作。
+## 4.函数get_im2col_indices函数作用
+&emsp;作为辅助函数实现输入特征图img2col后的矩阵大小的确定。
+## 5.Layer类的作用
+&emsp;Layer类为通用基类，通过继承该基类可以实现不同的层，例如卷积层、池化层、批量归一化层等等。
+## 6.Execution类的作用
+&emsp;该类包含若干层的操作，此处主要实现卷积操作，由类函数conv2d实现。