@@ -64,49 +64,111 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
64
64
batchSize,
65
65
codeLength_,
66
66
/* trans */ false ,
67
- useGpu (deviceId_) );
67
+ false );
68
68
Matrix::resizeOrCreate (preOutput_.grad ,
69
69
batchSize,
70
70
codeLength_,
71
71
/* trans */ false ,
72
- useGpu (deviceId_));
73
-
72
+ false );
74
73
IVectorPtr label = getInput (*getLabelLayer ()).ids ;
75
-
76
74
preOutput_.value ->zeroMem ();
77
75
76
+ if (useGpu_) {
77
+ Matrix::resizeOrCreate (cpuOutput_,
78
+ output_.value ->getHeight (),
79
+ output_.value ->getWidth (),
80
+ /* trans */ false ,
81
+ false );
82
+ IVector::resizeOrCreate (cpuLabel_, label->getSize (), false );
83
+ cpuLabel_->copyFrom (*label);
84
+ cpuOutput_->copyFrom (*output_.value );
85
+ } else {
86
+ cpuOutput_ = output_.value ;
87
+ cpuLabel_ = label;
88
+ }
78
89
/* add the bias-vector */
79
90
if (biases_.get () != NULL ) {
80
- preOutput_.value ->addByBitCode (numClasses_, *label, *biases_->getW ());
91
+ if (useGpu_) {
92
+ Matrix::resizeOrCreate (cpuBias_,
93
+ 1 ,
94
+ numClasses_ - 1 ,
95
+ /* trans */ false ,
96
+ false );
97
+ cpuBias_->copyFrom (*biases_->getW ());
98
+ } else {
99
+ cpuBias_ = biases_->getW ();
100
+ }
101
+ preOutput_.value ->addByBitCode (numClasses_, *cpuLabel_, *cpuBias_);
81
102
}
82
103
for (size_t i = 0 ; i < inputLayers_.size () - 1 ; ++i) {
83
104
MatrixPtr input = getInputValue (i);
105
+ if (useGpu_) {
106
+ Matrix::resizeOrCreate (cpuInput_,
107
+ input->getHeight (),
108
+ input->getWidth (),
109
+ /* trans */ false ,
110
+ false );
111
+ Matrix::resizeOrCreate (cpuWeight_,
112
+ weights_[i]->getW ()->getHeight (),
113
+ weights_[i]->getW ()->getWidth (),
114
+ /* trans */ false ,
115
+ false );
116
+ cpuInput_->copyFrom (*input);
117
+ cpuWeight_->copyFrom (*weights_[i]->getW ());
118
+ } else {
119
+ cpuInput_ = input;
120
+ cpuWeight_ = weights_[i]->getW ();
121
+ }
84
122
preOutput_.value ->mulByBitCode (
85
- numClasses_, *label , *weights_[i]-> getW () , *input );
123
+ numClasses_, *cpuLabel_ , *cpuWeight_ , *cpuInput_ );
86
124
}
87
125
// keep consistent with the clipping in the following softrelu
88
126
preOutput_.value ->clip (-40.0 , 40.0 );
89
127
preOutput_.value ->sumByBitCode (numClasses_,
90
- *label ,
91
- *output_. value ,
128
+ *cpuLabel_ ,
129
+ *cpuOutput_ ,
92
130
-1 ); // scaleSum
93
131
preOutput_.value ->softrelu (*preOutput_.value );
94
- MatrixPtr sum =
95
- Matrix::create (batchSize, 1 , /* trans= */ false , useGpu (deviceId_));
132
+ MatrixPtr sum = Matrix::create (batchSize, 1 , /* trans= */ false , false );
96
133
preOutput_.value ->rowSum (*sum);
97
- output_.value ->add (*sum);
134
+ cpuOutput_->add (*sum);
135
+ if (useGpu_) {
136
+ output_.value ->copyFrom (*cpuOutput_);
137
+ } else {
138
+ output_.value = cpuOutput_;
139
+ }
98
140
}
99
141
100
142
void HierarchicalSigmoidLayer::backward (const UpdateCallback& callback) {
101
143
IVectorPtr label = getInput (*getLabelLayer ()).ids ;
144
+ if (useGpu_) {
145
+ IVector::resizeOrCreate (cpuLabel_, label->getSize (), false );
146
+ cpuLabel_->copyFrom (*label);
147
+ } else {
148
+ cpuLabel_ = label;
149
+ }
102
150
preOutput_.grad ->one ();
103
151
preOutput_.grad ->softreluDerivative (*preOutput_.value );
104
- preOutput_.grad ->subByBitCode (numClasses_, *label );
152
+ preOutput_.grad ->subByBitCode (numClasses_, *cpuLabel_ );
105
153
106
154
if (biases_ && biases_->getWGrad ()) {
107
- preOutput_.grad ->addByBitCodeBackward (
108
- numClasses_, *label, *biases_->getWGrad ());
109
-
155
+ MatrixPtr biases_grad = biases_->getWGrad ();
156
+ if (useGpu_) {
157
+ Matrix::resizeOrCreate (cpuBias_,
158
+ 1 ,
159
+ numClasses_ - 1 ,
160
+ /* trans */ false ,
161
+ false );
162
+ cpuBias_->copyFrom (*biases_grad);
163
+ } else {
164
+ cpuBias_ = biases_grad;
165
+ }
166
+ preOutput_.grad ->addByBitCodeBackward (numClasses_, *cpuLabel_, *cpuBias_);
167
+ if (useGpu) {
168
+ biases_grad->copyFrom (*cpuBias_);
169
+ } else {
170
+ biases_grad = cpuBias_;
171
+ }
110
172
/* Increasing the number of gradient */
111
173
biases_->getParameterPtr ()->incUpdate (callback);
112
174
}
@@ -115,18 +177,62 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
115
177
/* Calculate the W-gradient for the current layer */
116
178
MatrixPtr input = getInputValue (i);
117
179
if (weights_[i]->getWGrad ()) {
180
+ MatrixPtr weights_grad = weights_[i]->getWGrad ();
181
+ if (useGpu_) {
182
+ Matrix::resizeOrCreate (cpuInput_,
183
+ input->getHeight (),
184
+ input->getWidth (),
185
+ /* trans */ false ,
186
+ false );
187
+ Matrix::resizeOrCreate (cpuWeightGrad_,
188
+ weights_grad->getHeight (),
189
+ weights_grad->getWidth (),
190
+ /* trans */ false ,
191
+ false );
192
+ cpuInput_->copyFrom (*input);
193
+ cpuWeightGrad_->copyFrom (*weights_grad);
194
+ } else {
195
+ cpuInput_ = input;
196
+ cpuWeightGrad_ = weights_grad;
197
+ }
118
198
preOutput_.grad ->mulByBitCodeBackwardWeight (
119
- numClasses_, *label, *weights_[i]->getWGrad (), *input);
120
-
199
+ numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_);
200
+ if (useGpu_) {
201
+ weights_grad->copyFrom (*cpuWeightGrad_);
202
+ } else {
203
+ weights_grad = cpuWeightGrad_;
204
+ }
121
205
/* Increasing the number of gradient */
122
206
weights_[i]->getParameterPtr ()->incUpdate (callback);
123
207
}
124
208
125
209
/* Calculate the input layers error */
126
210
MatrixPtr inputGrad = getInputGrad (i);
127
211
if (inputGrad) {
212
+ if (useGpu_) {
213
+ Matrix::resizeOrCreate (cpuInputGrad_,
214
+ inputGrad->getHeight (),
215
+ inputGrad->getWidth (),
216
+ /* trans */ false ,
217
+ false );
218
+ Matrix::resizeOrCreate (cpuWeight_,
219
+ weights_[i]->getW ()->getHeight (),
220
+ weights_[i]->getW ()->getWidth (),
221
+ /* trans */ false ,
222
+ false );
223
+ cpuInputGrad_->copyFrom (*inputGrad);
224
+ cpuWeight_->copyFrom (*weights_[i]->getW ());
225
+ } else {
226
+ cpuInputGrad_ = inputGrad;
227
+ cpuWeight_ = weights_[i]->getW ();
228
+ }
128
229
preOutput_.grad ->mulByBitCodeBackwardError (
129
- numClasses_, *label, *weights_[i]->getW (), *inputGrad);
230
+ numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_);
231
+ if (useGpu_) {
232
+ inputGrad->copyFrom (*cpuInputGrad_);
233
+ } else {
234
+ inputGrad = cpuInputGrad_;
235
+ }
130
236
}
131
237
}
132
238
}
0 commit comments