|
| 1 | +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. */ |
| 14 | + |
| 15 | +#include "Layer.h" |
| 16 | +#include "NormLayer.h" |
| 17 | +#include "paddle/math/BaseMatrix.h" |
| 18 | +#include "paddle/math/Matrix.h" |
| 19 | + |
| 20 | +namespace paddle { |
| 21 | + |
| 22 | +MatrixPtr CrossChannelNormLayer::createSampleMatrix(MatrixPtr data, |
| 23 | + size_t iter, |
| 24 | + size_t spatialDim) { |
| 25 | + return Matrix::create(data->getData() + iter * channels_ * spatialDim, |
| 26 | + channels_, |
| 27 | + spatialDim, |
| 28 | + false, |
| 29 | + useGpu_); |
| 30 | +} |
| 31 | + |
| 32 | +MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data, |
| 33 | + size_t iter, |
| 34 | + size_t spatialDim) { |
| 35 | + return Matrix::create( |
| 36 | + data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_); |
| 37 | +} |
| 38 | + |
| 39 | +void CrossChannelNormLayer::forward(PassType passType) { |
| 40 | + Layer::forward(passType); |
| 41 | + MatrixPtr inV = getInputValue(0); |
| 42 | + |
| 43 | + size_t batchSize = inV->getHeight(); |
| 44 | + size_t dataDim = inV->getWidth(); |
| 45 | + CHECK_EQ(getSize(), dataDim); |
| 46 | + |
| 47 | + reserveOutput(batchSize, dataDim); |
| 48 | + MatrixPtr outV = getOutputValue(); |
| 49 | + size_t spatialDim = dataDim / channels_; |
| 50 | + |
| 51 | + Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); |
| 52 | + Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); |
| 53 | + Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); |
| 54 | + normBuffer_->zeroMem(); |
| 55 | + // add eps to avoid overflow |
| 56 | + normBuffer_->addScalar(*normBuffer_, 1e-6); |
| 57 | + inV->square2(*dataBuffer_); |
| 58 | + for (size_t i = 0; i < batchSize; i++) { |
| 59 | + const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); |
| 60 | + const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim); |
| 61 | + MatrixPtr outVTmp = createSampleMatrix(outV, i, spatialDim); |
| 62 | + MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim); |
| 63 | + |
| 64 | + // compute norm. |
| 65 | + spatialBuffer_->sumCols(*dataTmp, 1, 0); |
| 66 | + spatialBuffer_->sqrt2(*spatialBuffer_); |
| 67 | + normTmp->copyFrom(*spatialBuffer_); |
| 68 | + outVTmp->copyFrom(*inVTmp); |
| 69 | + outVTmp->divRowVector(*spatialBuffer_); |
| 70 | + // scale the layer. |
| 71 | + outVTmp->mulColVector(*scale_->getW()); |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +void CrossChannelNormLayer::backward(const UpdateCallback& callback) { |
| 76 | + MatrixPtr inG = getInputGrad(0); |
| 77 | + MatrixPtr inV = getInputValue(0); |
| 78 | + MatrixPtr outG = getOutputGrad(); |
| 79 | + MatrixPtr outV = getOutputValue(); |
| 80 | + |
| 81 | + size_t batchSize = inG->getHeight(); |
| 82 | + size_t dataDim = inG->getWidth(); |
| 83 | + size_t spatialDim = dataDim / channels_; |
| 84 | + |
| 85 | + dataBuffer_->dotMul(*outG, *outV); |
| 86 | + Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); |
| 87 | + Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); |
| 88 | + Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_); |
| 89 | + scaleDiff_->zeroMem(); |
| 90 | + for (size_t i = 0; i < batchSize; i++) { |
| 91 | + MatrixPtr outGTmp = createSampleMatrix(outG, i, spatialDim); |
| 92 | + const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim); |
| 93 | + const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim); |
| 94 | + const MatrixPtr inGTmp = createSampleMatrix(inG, i, spatialDim); |
| 95 | + const MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim); |
| 96 | + |
| 97 | + channelBuffer_->sumRows(*dataTmp, 1, 0); |
| 98 | + channelBuffer_->dotDiv(*channelBuffer_, *(scale_->getW())); |
| 99 | + // store a / scale[i] in scaleDiff_ temporary |
| 100 | + scaleDiff_->add(*channelBuffer_, 1.); |
| 101 | + |
| 102 | + sampleBuffer_->dotMul(*inVTmp, *outGTmp); |
| 103 | + spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); |
| 104 | + // scale the grad |
| 105 | + inGTmp->copyFrom(*inVTmp); |
| 106 | + inGTmp->mulRowVector(*spatialBuffer_); |
| 107 | + // divide by square of norm |
| 108 | + spatialBuffer_->dotMul(*normTmp, *normTmp); |
| 109 | + inGTmp->divRowVector(*spatialBuffer_); |
| 110 | + // subtract |
| 111 | + inGTmp->add(*outGTmp, -1, 1); |
| 112 | + // divide by norm |
| 113 | + inGTmp->divRowVector(*normTmp); |
| 114 | + // scale the diff |
| 115 | + inGTmp->mulColVector(*scale_->getW()); |
| 116 | + } |
| 117 | + // updata scale |
| 118 | + if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); |
| 119 | + scale_->getParameterPtr()->incUpdate(callback); |
| 120 | +} |
| 121 | + |
| 122 | +} // namespace paddle |
0 commit comments