Skip to content

Commit 976f96a

Browse files
Merge pull request #5926 from peterzhang2029/hsigmoid_gpu
Fix hsigmoid_layer when using GPU.
2 parents c975fe1 + b156c6a commit 976f96a

File tree

3 files changed

+140
-24
lines changed

3 files changed

+140
-24
lines changed

paddle/gserver/layers/HierarchicalSigmoidLayer.cpp

Lines changed: 124 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -64,49 +64,111 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
6464
batchSize,
6565
codeLength_,
6666
/* trans */ false,
67-
useGpu(deviceId_));
67+
false);
6868
Matrix::resizeOrCreate(preOutput_.grad,
6969
batchSize,
7070
codeLength_,
7171
/* trans */ false,
72-
useGpu(deviceId_));
73-
72+
false);
7473
IVectorPtr label = getInput(*getLabelLayer()).ids;
75-
7674
preOutput_.value->zeroMem();
7775

76+
if (useGpu_) {
77+
Matrix::resizeOrCreate(cpuOutput_,
78+
output_.value->getHeight(),
79+
output_.value->getWidth(),
80+
/* trans */ false,
81+
false);
82+
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
83+
cpuLabel_->copyFrom(*label);
84+
cpuOutput_->copyFrom(*output_.value);
85+
} else {
86+
cpuOutput_ = output_.value;
87+
cpuLabel_ = label;
88+
}
7889
/* add the bias-vector */
7990
if (biases_.get() != NULL) {
80-
preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW());
91+
if (useGpu_) {
92+
Matrix::resizeOrCreate(cpuBias_,
93+
1,
94+
numClasses_ - 1,
95+
/* trans */ false,
96+
false);
97+
cpuBias_->copyFrom(*biases_->getW());
98+
} else {
99+
cpuBias_ = biases_->getW();
100+
}
101+
preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_);
81102
}
82103
for (size_t i = 0; i < inputLayers_.size() - 1; ++i) {
83104
MatrixPtr input = getInputValue(i);
105+
if (useGpu_) {
106+
Matrix::resizeOrCreate(cpuInput_,
107+
input->getHeight(),
108+
input->getWidth(),
109+
/* trans */ false,
110+
false);
111+
Matrix::resizeOrCreate(cpuWeight_,
112+
weights_[i]->getW()->getHeight(),
113+
weights_[i]->getW()->getWidth(),
114+
/* trans */ false,
115+
false);
116+
cpuInput_->copyFrom(*input);
117+
cpuWeight_->copyFrom(*weights_[i]->getW());
118+
} else {
119+
cpuInput_ = input;
120+
cpuWeight_ = weights_[i]->getW();
121+
}
84122
preOutput_.value->mulByBitCode(
85-
numClasses_, *label, *weights_[i]->getW(), *input);
123+
numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_);
86124
}
87125
// keep consistent with the clipping in the following softrelu
88126
preOutput_.value->clip(-40.0, 40.0);
89127
preOutput_.value->sumByBitCode(numClasses_,
90-
*label,
91-
*output_.value,
128+
*cpuLabel_,
129+
*cpuOutput_,
92130
-1); // scaleSum
93131
preOutput_.value->softrelu(*preOutput_.value);
94-
MatrixPtr sum =
95-
Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_));
132+
MatrixPtr sum = Matrix::create(batchSize, 1, /* trans= */ false, false);
96133
preOutput_.value->rowSum(*sum);
97-
output_.value->add(*sum);
134+
cpuOutput_->add(*sum);
135+
if (useGpu_) {
136+
output_.value->copyFrom(*cpuOutput_);
137+
} else {
138+
output_.value = cpuOutput_;
139+
}
98140
}
99141

100142
void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
101143
IVectorPtr label = getInput(*getLabelLayer()).ids;
144+
if (useGpu_) {
145+
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
146+
cpuLabel_->copyFrom(*label);
147+
} else {
148+
cpuLabel_ = label;
149+
}
102150
preOutput_.grad->one();
103151
preOutput_.grad->softreluDerivative(*preOutput_.value);
104-
preOutput_.grad->subByBitCode(numClasses_, *label);
152+
preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_);
105153

106154
if (biases_ && biases_->getWGrad()) {
107-
preOutput_.grad->addByBitCodeBackward(
108-
numClasses_, *label, *biases_->getWGrad());
109-
155+
MatrixPtr biases_grad = biases_->getWGrad();
156+
if (useGpu_) {
157+
Matrix::resizeOrCreate(cpuBias_,
158+
1,
159+
numClasses_ - 1,
160+
/* trans */ false,
161+
false);
162+
cpuBias_->copyFrom(*biases_grad);
163+
} else {
164+
cpuBias_ = biases_grad;
165+
}
166+
preOutput_.grad->addByBitCodeBackward(numClasses_, *cpuLabel_, *cpuBias_);
167+
if (useGpu) {
168+
biases_grad->copyFrom(*cpuBias_);
169+
} else {
170+
biases_grad = cpuBias_;
171+
}
110172
/* Increasing the number of gradient */
111173
biases_->getParameterPtr()->incUpdate(callback);
112174
}
@@ -115,18 +177,62 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
115177
/* Calculate the W-gradient for the current layer */
116178
MatrixPtr input = getInputValue(i);
117179
if (weights_[i]->getWGrad()) {
180+
MatrixPtr weights_grad = weights_[i]->getWGrad();
181+
if (useGpu_) {
182+
Matrix::resizeOrCreate(cpuInput_,
183+
input->getHeight(),
184+
input->getWidth(),
185+
/* trans */ false,
186+
false);
187+
Matrix::resizeOrCreate(cpuWeightGrad_,
188+
weights_grad->getHeight(),
189+
weights_grad->getWidth(),
190+
/* trans */ false,
191+
false);
192+
cpuInput_->copyFrom(*input);
193+
cpuWeightGrad_->copyFrom(*weights_grad);
194+
} else {
195+
cpuInput_ = input;
196+
cpuWeightGrad_ = weights_grad;
197+
}
118198
preOutput_.grad->mulByBitCodeBackwardWeight(
119-
numClasses_, *label, *weights_[i]->getWGrad(), *input);
120-
199+
numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_);
200+
if (useGpu_) {
201+
weights_grad->copyFrom(*cpuWeightGrad_);
202+
} else {
203+
weights_grad = cpuWeightGrad_;
204+
}
121205
/* Increasing the number of gradient */
122206
weights_[i]->getParameterPtr()->incUpdate(callback);
123207
}
124208

125209
/* Calculate the input layers error */
126210
MatrixPtr inputGrad = getInputGrad(i);
127211
if (inputGrad) {
212+
if (useGpu_) {
213+
Matrix::resizeOrCreate(cpuInputGrad_,
214+
inputGrad->getHeight(),
215+
inputGrad->getWidth(),
216+
/* trans */ false,
217+
false);
218+
Matrix::resizeOrCreate(cpuWeight_,
219+
weights_[i]->getW()->getHeight(),
220+
weights_[i]->getW()->getWidth(),
221+
/* trans */ false,
222+
false);
223+
cpuInputGrad_->copyFrom(*inputGrad);
224+
cpuWeight_->copyFrom(*weights_[i]->getW());
225+
} else {
226+
cpuInputGrad_ = inputGrad;
227+
cpuWeight_ = weights_[i]->getW();
228+
}
128229
preOutput_.grad->mulByBitCodeBackwardError(
129-
numClasses_, *label, *weights_[i]->getW(), *inputGrad);
230+
numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_);
231+
if (useGpu_) {
232+
inputGrad->copyFrom(*cpuInputGrad_);
233+
} else {
234+
inputGrad = cpuInputGrad_;
235+
}
130236
}
131237
}
132238
}

paddle/gserver/layers/HierarchicalSigmoidLayer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ class HierarchicalSigmoidLayer : public Layer {
8080
int codeLength_;
8181
/// temporary result of output_
8282
Argument preOutput_;
83+
84+
/// The temporary variables in CPU memory.
85+
MatrixPtr cpuWeight_;
86+
MatrixPtr cpuWeightGrad_;
87+
MatrixPtr cpuInput_;
88+
MatrixPtr cpuInputGrad_;
89+
MatrixPtr cpuBias_;
90+
MatrixPtr cpuOutput_;
91+
IVectorPtr cpuLabel_;
8392
};
8493

8594
} // namespace paddle

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -681,12 +681,13 @@ TEST(Layer, hsigmoidLayer) {
681681
config.layerConfig.add_inputs();
682682
config.layerConfig.add_inputs();
683683

684-
// Not support GPU now
685-
testLayerGrad(config,
686-
"hsigmoid",
687-
100,
688-
/* trans */ false, /* useGpu */
689-
false);
684+
for (auto useGpu : {false, true}) {
685+
testLayerGrad(config,
686+
"hsigmoid",
687+
100,
688+
/* trans */ false,
689+
/* useGpu */ useGpu);
690+
}
690691
}
691692

692693
TEST(Layer, multi_cross) {

0 commit comments

Comments
 (0)