@@ -64,49 +64,113 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
64
64
batchSize,
65
65
codeLength_,
66
66
/* trans */ false ,
67
- useGpu (deviceId_) );
67
+ false );
68
68
Matrix::resizeOrCreate (preOutput_.grad ,
69
69
batchSize,
70
70
codeLength_,
71
71
/* trans */ false ,
72
- useGpu (deviceId_));
73
-
72
+ false );
74
73
IVectorPtr label = getInput (*getLabelLayer ()).ids ;
75
-
76
74
preOutput_.value ->zeroMem ();
77
75
76
+ if (useGpu_) {
77
+ Matrix::resizeOrCreate (cpuOutput_,
78
+ output_.value ->getHeight (),
79
+ output_.value ->getWidth (),
80
+ /* trans */ false ,
81
+ false );
82
+ IVector::resizeOrCreate (cpuLabel_, label->getSize (), false );
83
+ cpuLabel_->copyFrom (*label);
84
+ cpuOutput_->copyFrom (*output_.value );
85
+ } else {
86
+ cpuOutput_ = output_.value ;
87
+ cpuLabel_ = label;
88
+ }
78
89
/* add the bias-vector */
79
90
if (biases_.get () != NULL ) {
80
- preOutput_.value ->addByBitCode (numClasses_, *label, *biases_->getW ());
91
+ if (useGpu_) {
92
+ Matrix::resizeOrCreate (cpuBias_,
93
+ 1 ,
94
+ numClasses_ - 1 ,
95
+ /* trans */ false ,
96
+ false );
97
+ cpuBias_->copyFrom (*biases_->getW ());
98
+ } else {
99
+ cpuBias_ = biases_->getW ();
100
+ }
101
+ preOutput_.value ->addByBitCode (numClasses_, *cpuLabel_, *cpuBias_);
81
102
}
82
103
for (size_t i = 0 ; i < inputLayers_.size () - 1 ; ++i) {
83
104
MatrixPtr input = getInputValue (i);
105
+ if (useGpu_) {
106
+ Matrix::resizeOrCreate (cpuInput_,
107
+ input->getHeight (),
108
+ input->getWidth (),
109
+ /* trans */ false ,
110
+ false );
111
+ Matrix::resizeOrCreate (cpuWeight_,
112
+ weights_[i]->getW ()->getHeight (),
113
+ weights_[i]->getW ()->getWidth (),
114
+ /* trans */ false ,
115
+ false );
116
+ cpuInput_->copyFrom (*input);
117
+ cpuWeight_->copyFrom (*weights_[i]->getW ());
118
+ } else {
119
+ cpuInput_ = input;
120
+ cpuWeight_ = weights_[i]->getW ();
121
+ }
84
122
preOutput_.value ->mulByBitCode (
85
- numClasses_, *label , *weights_[i]-> getW () , *input );
123
+ numClasses_, *cpuLabel_ , *cpuWeight_ , *cpuInput_ );
86
124
}
87
125
// keep consistent with the clipping in the following softrelu
88
126
preOutput_.value ->clip (-40.0 , 40.0 );
89
127
preOutput_.value ->sumByBitCode (numClasses_,
90
- *label ,
91
- *output_. value ,
128
+ *cpuLabel_ ,
129
+ *cpuOutput_ ,
92
130
-1 ); // scaleSum
93
131
preOutput_.value ->softrelu (*preOutput_.value );
94
132
MatrixPtr sum =
95
- Matrix::create (batchSize, 1 , /* trans= */ false , useGpu (deviceId_) );
133
+ Matrix::create (batchSize, 1 , /* trans= */ false , false );
96
134
preOutput_.value ->rowSum (*sum);
97
- output_.value ->add (*sum);
135
+ cpuOutput_->add (*sum);
136
+ if (useGpu_) {
137
+ output_.value ->copyFrom (*cpuOutput_);
138
+ } else {
139
+ output_.value = cpuOutput_;
140
+ }
98
141
}
99
142
100
143
void HierarchicalSigmoidLayer::backward (const UpdateCallback& callback) {
101
144
IVectorPtr label = getInput (*getLabelLayer ()).ids ;
145
+ if (useGpu_) {
146
+ IVector::resizeOrCreate (cpuLabel_, label->getSize (), false );
147
+ cpuLabel_->copyFrom (*label);
148
+ } else {
149
+ cpuLabel_ = label;
150
+ }
102
151
preOutput_.grad ->one ();
103
152
preOutput_.grad ->softreluDerivative (*preOutput_.value );
104
- preOutput_.grad ->subByBitCode (numClasses_, *label );
153
+ preOutput_.grad ->subByBitCode (numClasses_, *cpuLabel_ );
105
154
106
155
if (biases_ && biases_->getWGrad ()) {
156
+ MatrixPtr biases_grad = biases_->getWGrad ();
157
+ if (useGpu_) {
158
+ Matrix::resizeOrCreate (cpuBias_,
159
+ 1 ,
160
+ numClasses_ - 1 ,
161
+ /* trans */ false ,
162
+ false );
163
+ cpuBias_->copyFrom (*biases_grad);
164
+ } else {
165
+ cpuBias_ = biases_grad;
166
+ }
107
167
preOutput_.grad ->addByBitCodeBackward (
108
- numClasses_, *label, *biases_->getWGrad ());
109
-
168
+ numClasses_, *cpuLabel_, *cpuBias_);
169
+ if (useGpu) {
170
+ biases_grad->copyFrom (*cpuBias_);
171
+ } else {
172
+ biases_grad = cpuBias_;
173
+ }
110
174
/* Increasing the number of gradient */
111
175
biases_->getParameterPtr ()->incUpdate (callback);
112
176
}
@@ -115,18 +179,62 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
115
179
/* Calculate the W-gradient for the current layer */
116
180
MatrixPtr input = getInputValue (i);
117
181
if (weights_[i]->getWGrad ()) {
182
+ MatrixPtr weights_grad = weights_[i]->getWGrad ();
183
+ if (useGpu_) {
184
+ Matrix::resizeOrCreate (cpuInput_,
185
+ input->getHeight (),
186
+ input->getWidth (),
187
+ /* trans */ false ,
188
+ false );
189
+ Matrix::resizeOrCreate (cpuWeightGrad_,
190
+ weights_grad->getHeight (),
191
+ weights_grad->getWidth (),
192
+ /* trans */ false ,
193
+ false );
194
+ cpuInput_->copyFrom (*input);
195
+ cpuWeightGrad_->copyFrom (*weights_grad);
196
+ } else {
197
+ cpuInput_ = input;
198
+ cpuWeightGrad_ = weights_grad;
199
+ }
118
200
preOutput_.grad ->mulByBitCodeBackwardWeight (
119
- numClasses_, *label, *weights_[i]->getWGrad (), *input);
120
-
201
+ numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_);
202
+ if (useGpu_) {
203
+ weights_grad->copyFrom (*cpuWeightGrad_);
204
+ } else {
205
+ weights_grad = cpuWeightGrad_;
206
+ }
121
207
/* Increasing the number of gradient */
122
208
weights_[i]->getParameterPtr ()->incUpdate (callback);
123
209
}
124
210
125
211
/* Calculate the input layers error */
126
212
MatrixPtr inputGrad = getInputGrad (i);
127
213
if (inputGrad) {
214
+ if (useGpu_) {
215
+ Matrix::resizeOrCreate (cpuInputGrad_,
216
+ inputGrad->getHeight (),
217
+ inputGrad->getWidth (),
218
+ /* trans */ false ,
219
+ false );
220
+ Matrix::resizeOrCreate (cpuWeight_,
221
+ weights_[i]->getW ()->getHeight (),
222
+ weights_[i]->getW ()->getWidth (),
223
+ /* trans */ false ,
224
+ false );
225
+ cpuInputGrad_->copyFrom (*inputGrad);
226
+ cpuWeight_->copyFrom (*weights_[i]->getW ());
227
+ } else {
228
+ cpuInputGrad_ = inputGrad;
229
+ cpuWeight_ = weights_[i]->getW ();
230
+ }
128
231
preOutput_.grad ->mulByBitCodeBackwardError (
129
- numClasses_, *label, *weights_[i]->getW (), *inputGrad);
232
+ numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_);
233
+ if (useGpu_) {
234
+ inputGrad->copyFrom (*cpuInputGrad_);
235
+ } else {
236
+ inputGrad = cpuInputGrad_;
237
+ }
130
238
}
131
239
}
132
240
}
0 commit comments