|
| 1 | +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. */ |
| 14 | + |
| 15 | + |
| 16 | +#include "ExpandConvBaseLayer.h" |
| 17 | + |
| 18 | +#include "paddle/utils/Logging.h" |
| 19 | +namespace paddle { |
| 20 | + |
| 21 | +bool ExpandConvBaseLayer::init(const LayerMap &layerMap, |
| 22 | + const ParameterMap ¶meterMap) { |
| 23 | + /* Initialize the basic convolutional parent class */ |
| 24 | + ConvBaseLayer::init(layerMap, parameterMap); |
| 25 | + |
| 26 | + /* The class fields channels_ and numFilters_ are the same as in the config |
| 27 | + * i.e., channels_ is the for the input and numFilters_ is for the output |
| 28 | + * |
| 29 | + * But in order for the variables in convTrans having the same semantic |
| 30 | + * meaning as in conv, we need to swap channels_ and numFilters here for |
| 31 | + * convTrans, and in other functions too. |
| 32 | + * */ |
| 33 | + int channel; |
| 34 | + int numFilters; |
| 35 | + /* Initialize the projection */ |
| 36 | + for (auto &inputConfig : config_.inputs()) { |
| 37 | + const ConvConfig &conf = inputConfig.conv_conf(); |
| 38 | + numFilters = isDeconv_ ? conf.channels() : numFilters_; |
| 39 | + subM_.push_back(numFilters / conf.groups()); |
| 40 | + subN_.push_back(conf.output_x() * conf.output_x()); |
| 41 | + channel = isDeconv_ ? numFilters_ : conf.channels(); |
| 42 | + subK_.push_back(channel * conf.filter_size() * conf.filter_size() / |
| 43 | + conf.groups()); |
| 44 | + /* Consistent caffe mode for multiple input */ |
| 45 | + caffeMode_ = conf.caffe_mode(); |
| 46 | + } |
| 47 | + |
| 48 | + getOutputSize(); |
| 49 | + |
| 50 | + return true; |
| 51 | +} |
| 52 | + |
| 53 | +size_t ExpandConvBaseLayer::getOutputSize() { |
| 54 | + CHECK_NE(inputLayers_.size(), 0UL); |
| 55 | + size_t layerSize = ConvBaseLayer::calOutputSize(); |
| 56 | + subN_.clear(); |
| 57 | + for (size_t i = 0; i < inputLayers_.size(); i++) { |
| 58 | + subN_.push_back(outputH_[i] * outputW_[i]); |
| 59 | + } |
| 60 | + return layerSize; |
| 61 | +} |
| 62 | + |
| 63 | +void ExpandConvBaseLayer::resetExpandInput(size_t height, size_t width) { |
| 64 | + Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_); |
| 65 | +} |
| 66 | + |
| 67 | +void ExpandConvBaseLayer::addSharedBias() { |
| 68 | + size_t mapW = getOutputSize() / numFilters_; |
| 69 | + size_t mapH = getOutputValue()->getElementCnt() / mapW; |
| 70 | + MatrixPtr out = |
| 71 | + Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_); |
| 72 | + |
| 73 | + Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); |
| 74 | + |
| 75 | + out->transpose(transOutValue_, false); // false means no memory allocation |
| 76 | + transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, |
| 77 | + numFilters_); |
| 78 | + |
| 79 | + MatrixPtr bias = |
| 80 | + Matrix::create(biases_->getW()->getData(), 1, |
| 81 | + biases_->getW()->getElementCnt(), false, useGpu_); |
| 82 | + transOutValue_->addBias(*bias, 1.0f); |
| 83 | + |
| 84 | + transOutValue_->reshape(mapW, mapH); |
| 85 | + transOutValue_->transpose(out, false); // false means no memory allocation |
| 86 | + |
| 87 | + out->clear(); |
| 88 | + bias->clear(); |
| 89 | +} |
| 90 | + |
| 91 | +void ExpandConvBaseLayer::addUnsharedBias() { |
| 92 | + MatrixPtr outValue = getOutputValue(); |
| 93 | + MatrixPtr bias = |
| 94 | + Matrix::create(biases_->getW()->getData(), 1, |
| 95 | + biases_->getW()->getElementCnt(), false, useGpu_); |
| 96 | + outValue->addBias(*bias, 1.0f); |
| 97 | +} |
| 98 | + |
| 99 | + |
| 100 | +void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx, |
| 101 | + int inIdx) { |
| 102 | + int channel = isDeconv_ ? numFilters_ : channels_[inIdx]; |
| 103 | + |
| 104 | + resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); |
| 105 | + real *imgData = image->getData() + startIdx * image->getWidth(); |
| 106 | + MatrixPtr imageTmp = Matrix::create( |
| 107 | + imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false, |
| 108 | + useGpu_); |
| 109 | + expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx], |
| 110 | + channel, filterSize_[inIdx], |
| 111 | + filterSize_[inIdx], stride_[inIdx], stride_[inIdx], |
| 112 | + padding_[inIdx], padding_[inIdx], |
| 113 | + outputH_[inIdx], outputW_[inIdx]); |
| 114 | + imageTmp->clear(); |
| 115 | +} |
| 116 | + |
| 117 | +void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out, |
| 118 | + int inIdx, int startIdx) { |
| 119 | + int subM = subM_[inIdx]; |
| 120 | + int subN = subN_[inIdx]; |
| 121 | + int subK = subK_[inIdx]; |
| 122 | + |
| 123 | + expandOneFrame(image, startIdx, inIdx); |
| 124 | + |
| 125 | + int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_; |
| 126 | + |
| 127 | + real *outData = |
| 128 | + out->getData() + startIdx * subN * numFilters; |
| 129 | + |
| 130 | + real *wgtData = weights_[inIdx]->getW()->getData(); |
| 131 | + real *expInData = expandInput_->getData(); |
| 132 | + for (int g = 0; g < groups_[inIdx]; ++g) { |
| 133 | + MatrixPtr A = |
| 134 | + Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose |
| 135 | + MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); |
| 136 | + MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); |
| 137 | + C->mul(A, B, 1, 1); |
| 138 | + |
| 139 | + A->clear(); |
| 140 | + B->clear(); |
| 141 | + C->clear(); |
| 142 | + wgtData += subK * subM; |
| 143 | + expInData += subK * subN; |
| 144 | + outData += subM * subN; |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image, |
| 149 | + int inpIdx) { |
| 150 | + int channel = isDeconv_ ? numFilters_ : channels_[inpIdx]; |
| 151 | + |
| 152 | + int subM = subM_[inpIdx]; |
| 153 | + int subN = subN_[inpIdx]; |
| 154 | + int subK = subK_[inpIdx]; |
| 155 | + size_t batchSize = image->getHeight(); |
| 156 | + |
| 157 | + /* reset the expand-grad memory */ |
| 158 | + resetExpandInput(subK * groups_[inpIdx], subN); |
| 159 | + |
| 160 | + real *localGradData = out->getData(); |
| 161 | + real *tgtGradData = image->getData(); |
| 162 | + for (size_t n = 0; n < batchSize; n++) { |
| 163 | + real *wgtData = weights_[inpIdx]->getW()->getData(); |
| 164 | + real *expandInData = expandInput_->getData(); |
| 165 | + |
| 166 | + for (int g = 0; g < groups_[inpIdx]; g++) { |
| 167 | + // create temporary matrix |
| 168 | + MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); |
| 169 | + MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); |
| 170 | + MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); |
| 171 | + C->mul(A, B); // mul |
| 172 | + |
| 173 | + // clear the temporary matrix |
| 174 | + A->clear(); |
| 175 | + B->clear(); |
| 176 | + C->clear(); |
| 177 | + |
| 178 | + expandInData += subK * subN; |
| 179 | + localGradData += subM * subN; |
| 180 | + wgtData += subK * subM; |
| 181 | + } |
| 182 | + |
| 183 | + // shrink one frame outGrad |
| 184 | + MatrixPtr oneGradTmp = Matrix::create( |
| 185 | + expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_); |
| 186 | + MatrixPtr vTmp = Matrix::create( |
| 187 | + tgtGradData, 1, |
| 188 | + imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false, |
| 189 | + useGpu_); |
| 190 | + vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx], |
| 191 | + channel, filterSize_[inpIdx], |
| 192 | + filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx], |
| 193 | + padding_[inpIdx], padding_[inpIdx], |
| 194 | + outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f); |
| 195 | + vTmp->clear(); |
| 196 | + oneGradTmp->clear(); |
| 197 | + |
| 198 | + // move the data-pointer |
| 199 | + tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel; |
| 200 | + } |
| 201 | +} |
| 202 | + |
| 203 | +void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, MatrixPtr out, |
| 204 | + int inpIdx) { |
| 205 | + MatrixPtr weightGrad = weights_[inpIdx]->getWGrad(); |
| 206 | + |
| 207 | + int subM = subM_[inpIdx]; |
| 208 | + int subN = subN_[inpIdx]; |
| 209 | + int subK = subK_[inpIdx]; |
| 210 | + size_t batchSize = image->getHeight(); |
| 211 | + resetExpandInput(subK * groups_[inpIdx], subN); |
| 212 | + |
| 213 | + real *gradData = out->getData(); |
| 214 | + |
| 215 | + for (size_t n = 0; n < batchSize; n++) { // frame by frame |
| 216 | + // expand |
| 217 | + expandOneFrame(image, n, inpIdx); |
| 218 | + real *wGradData = weightGrad->getData(); |
| 219 | + real *expandInData = expandInput_->getData(); |
| 220 | + |
| 221 | + // expand-mul one-group by one |
| 222 | + for (int g = 0; g < groups_[inpIdx]; g++) { |
| 223 | + MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); |
| 224 | + MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_); |
| 225 | + MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); |
| 226 | + C->mul(A, B, 1, 1); |
| 227 | + |
| 228 | + A->clear(); |
| 229 | + B->clear(); |
| 230 | + C->clear(); |
| 231 | + gradData += subM * subN; |
| 232 | + wGradData += subK * subM; |
| 233 | + expandInData += subK * subN; |
| 234 | + } |
| 235 | + } |
| 236 | +} |
| 237 | + |
| 238 | +void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { |
| 239 | + size_t mapW = getOutputSize() / numFilters_; |
| 240 | + size_t mapH = v->getElementCnt() / mapW; |
| 241 | + MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); |
| 242 | + |
| 243 | + Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); |
| 244 | + |
| 245 | + vTmp->transpose(transOutValue_, false); // false means no memory allocation |
| 246 | + transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, |
| 247 | + numFilters_); |
| 248 | + biases->collectBias(*transOutValue_, 1.0f); |
| 249 | +} |
| 250 | + |
| 251 | +void ExpandConvBaseLayer::bpropBiases(MatrixPtr v) { |
| 252 | + MatrixPtr biases = |
| 253 | + Matrix::create(biases_->getWGrad()->getData(), 1, |
| 254 | + biases_->getWGrad()->getElementCnt(), false, useGpu_); |
| 255 | + if (sharedBiases_) { |
| 256 | + bpropSharedBias(biases, v); |
| 257 | + } else { |
| 258 | + biases->collectBias(*v, 1.0f); |
| 259 | + } |
| 260 | + biases->clear(); |
| 261 | +} |
| 262 | + |
| 263 | +} // namespace paddle |
0 commit comments