Skip to content

Commit cda3a77

Browse files
bug fix when using hsigmoid with gpu
1 parent 54b3994 commit cda3a77

File tree

2 files changed

+134
-16
lines changed

2 files changed

+134
-16
lines changed

paddle/gserver/layers/HierarchicalSigmoidLayer.cpp

Lines changed: 124 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,49 +64,113 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
6464
batchSize,
6565
codeLength_,
6666
/* trans */ false,
67-
useGpu(deviceId_));
67+
false);
6868
Matrix::resizeOrCreate(preOutput_.grad,
6969
batchSize,
7070
codeLength_,
7171
/* trans */ false,
72-
useGpu(deviceId_));
73-
72+
false);
7473
IVectorPtr label = getInput(*getLabelLayer()).ids;
75-
7674
preOutput_.value->zeroMem();
7775

76+
if (useGpu_) {
77+
Matrix::resizeOrCreate(cpuOutput_,
78+
output_.value->getHeight(),
79+
output_.value->getWidth(),
80+
/* trans */ false,
81+
false);
82+
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
83+
cpuLabel_->copyFrom(*label);
84+
cpuOutput_->copyFrom(*output_.value);
85+
} else {
86+
cpuOutput_ = output_.value;
87+
cpuLabel_ = label;
88+
}
7889
/* add the bias-vector */
7990
if (biases_.get() != NULL) {
80-
preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW());
91+
if (useGpu_) {
92+
Matrix::resizeOrCreate(cpuBias_,
93+
1,
94+
numClasses_ - 1,
95+
/* trans */ false,
96+
false);
97+
cpuBias_->copyFrom(*biases_->getW());
98+
} else {
99+
cpuBias_ = biases_->getW();
100+
}
101+
preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_);
81102
}
82103
for (size_t i = 0; i < inputLayers_.size() - 1; ++i) {
83104
MatrixPtr input = getInputValue(i);
105+
if (useGpu_) {
106+
Matrix::resizeOrCreate(cpuInput_,
107+
input->getHeight(),
108+
input->getWidth(),
109+
/* trans */ false,
110+
false);
111+
Matrix::resizeOrCreate(cpuWeight_,
112+
weights_[i]->getW()->getHeight(),
113+
weights_[i]->getW()->getWidth(),
114+
/* trans */ false,
115+
false);
116+
cpuInput_->copyFrom(*input);
117+
cpuWeight_->copyFrom(*weights_[i]->getW());
118+
} else {
119+
cpuInput_ = input;
120+
cpuWeight_ = weights_[i]->getW();
121+
}
84122
preOutput_.value->mulByBitCode(
85-
numClasses_, *label, *weights_[i]->getW(), *input);
123+
numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_);
86124
}
87125
// keep consistent with the clipping in the following softrelu
88126
preOutput_.value->clip(-40.0, 40.0);
89127
preOutput_.value->sumByBitCode(numClasses_,
90-
*label,
91-
*output_.value,
128+
*cpuLabel_,
129+
*cpuOutput_,
92130
-1); // scaleSum
93131
preOutput_.value->softrelu(*preOutput_.value);
94132
MatrixPtr sum =
95-
Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_));
133+
Matrix::create(batchSize, 1, /* trans= */ false, false);
96134
preOutput_.value->rowSum(*sum);
97-
output_.value->add(*sum);
135+
cpuOutput_->add(*sum);
136+
if (useGpu_) {
137+
output_.value->copyFrom(*cpuOutput_);
138+
} else {
139+
output_.value = cpuOutput_;
140+
}
98141
}
99142

100143
void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
101144
IVectorPtr label = getInput(*getLabelLayer()).ids;
145+
if (useGpu_) {
146+
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
147+
cpuLabel_->copyFrom(*label);
148+
} else {
149+
cpuLabel_ = label;
150+
}
102151
preOutput_.grad->one();
103152
preOutput_.grad->softreluDerivative(*preOutput_.value);
104-
preOutput_.grad->subByBitCode(numClasses_, *label);
153+
preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_);
105154

106155
if (biases_ && biases_->getWGrad()) {
156+
MatrixPtr biases_grad = biases_->getWGrad();
157+
if (useGpu_) {
158+
Matrix::resizeOrCreate(cpuBias_,
159+
1,
160+
numClasses_ - 1,
161+
/* trans */ false,
162+
false);
163+
cpuBias_->copyFrom(*biases_grad);
164+
} else {
165+
cpuBias_ = biases_grad;
166+
}
107167
preOutput_.grad->addByBitCodeBackward(
108-
numClasses_, *label, *biases_->getWGrad());
109-
168+
numClasses_, *cpuLabel_, *cpuBias_);
169+
if (useGpu) {
170+
biases_grad->copyFrom(*cpuBias_);
171+
} else {
172+
biases_grad = cpuBias_;
173+
}
110174
/* Increasing the number of gradient */
111175
biases_->getParameterPtr()->incUpdate(callback);
112176
}
@@ -115,18 +179,62 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
115179
/* Calculate the W-gradient for the current layer */
116180
MatrixPtr input = getInputValue(i);
117181
if (weights_[i]->getWGrad()) {
182+
MatrixPtr weights_grad = weights_[i]->getWGrad();
183+
if (useGpu_) {
184+
Matrix::resizeOrCreate(cpuInput_,
185+
input->getHeight(),
186+
input->getWidth(),
187+
/* trans */ false,
188+
false);
189+
Matrix::resizeOrCreate(cpuWeightGrad_,
190+
weights_grad->getHeight(),
191+
weights_grad->getWidth(),
192+
/* trans */ false,
193+
false);
194+
cpuInput_->copyFrom(*input);
195+
cpuWeightGrad_->copyFrom(*weights_grad);
196+
} else {
197+
cpuInput_ = input;
198+
cpuWeightGrad_ = weights_grad;
199+
}
118200
preOutput_.grad->mulByBitCodeBackwardWeight(
119-
numClasses_, *label, *weights_[i]->getWGrad(), *input);
120-
201+
numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_);
202+
if (useGpu_) {
203+
weights_grad->copyFrom(*cpuWeightGrad_);
204+
} else {
205+
weights_grad = cpuWeightGrad_;
206+
}
121207
/* Increasing the number of gradient */
122208
weights_[i]->getParameterPtr()->incUpdate(callback);
123209
}
124210

125211
/* Calculate the input layers error */
126212
MatrixPtr inputGrad = getInputGrad(i);
127213
if (inputGrad) {
214+
if (useGpu_) {
215+
Matrix::resizeOrCreate(cpuInputGrad_,
216+
inputGrad->getHeight(),
217+
inputGrad->getWidth(),
218+
/* trans */ false,
219+
false);
220+
Matrix::resizeOrCreate(cpuWeight_,
221+
weights_[i]->getW()->getHeight(),
222+
weights_[i]->getW()->getWidth(),
223+
/* trans */ false,
224+
false);
225+
cpuInputGrad_->copyFrom(*inputGrad);
226+
cpuWeight_->copyFrom(*weights_[i]->getW());
227+
} else {
228+
cpuInputGrad_ = inputGrad;
229+
cpuWeight_ = weights_[i]->getW();
230+
}
128231
preOutput_.grad->mulByBitCodeBackwardError(
129-
numClasses_, *label, *weights_[i]->getW(), *inputGrad);
232+
numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_);
233+
if (useGpu_) {
234+
inputGrad->copyFrom(*cpuInputGrad_);
235+
} else {
236+
inputGrad = cpuInputGrad_;
237+
}
130238
}
131239
}
132240
}

paddle/gserver/layers/HierarchicalSigmoidLayer.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,16 @@ class HierarchicalSigmoidLayer : public Layer {
8080
int codeLength_;
8181
/// temporary result of output_
8282
Argument preOutput_;
83+
84+
/// The temporary variables in CPU memory.
85+
MatrixPtr cpuWeight_;
86+
MatrixPtr cpuWeightGrad_;
87+
MatrixPtr cpuInput_;
88+
MatrixPtr cpuInputGrad_;
89+
MatrixPtr cpuBias_;
90+
MatrixPtr cpuOutput_;
91+
IVectorPtr cpuLabel_;
92+
8393
};
8494

8595
} // namespace paddle

0 commit comments

Comments
 (0)