@@ -45,25 +45,26 @@ class SoftmaxEigen {
4545 const int axis_dim,
4646 const phi::DenseTensor* X,
4747 phi::DenseTensor* Y) {
48- constexpr int kBatchDim = 0 ;
49- constexpr int kClassDim = 1 ;
50- constexpr int kAxisDim = 1 ;
48+ constexpr int64_t kBatchDim = 0 ;
49+ constexpr int64_t kClassDim = 1 ;
50+ constexpr int64_t kAxisDim = 1 ;
5151
5252 auto logits = EigenMatrix<T>::From (*X);
5353 auto softmax = EigenMatrix<T>::From (*Y);
5454
55- const int batch_size = logits.dimension (kBatchDim );
56- const int num_classes = logits.dimension (kClassDim );
57- const int num_remain = num_classes / axis_dim;
55+ const int64_t batch_size = logits.dimension (kBatchDim );
56+ const int64_t num_classes = logits.dimension (kClassDim );
57+ const int64_t num_remain = num_classes / axis_dim;
5858
59- Eigen::DSizes<int , 1 > along_axis (kAxisDim );
60- Eigen::DSizes<int , 2 > batch_classes (batch_size, num_classes);
61- Eigen::DSizes<int , 2 > batch_by_one (batch_size, 1 );
62- Eigen::DSizes<int , 2 > one_by_class (1 , num_classes);
63- Eigen::DSizes<int , 3 > batch_one_remain (batch_size, 1 , num_remain);
64- Eigen::DSizes<int , 3 > one_axis_one (1 , axis_dim, 1 );
65- Eigen::DSizes<int , 2 > one_axis (1 , axis_dim);
66- Eigen::DSizes<int , 3 > batch_axis_remain (batch_size, axis_dim, num_remain);
59+ Eigen::DSizes<int64_t , 1 > along_axis (kAxisDim );
60+ Eigen::DSizes<int64_t , 2 > batch_classes (batch_size, num_classes);
61+ Eigen::DSizes<int64_t , 2 > batch_by_one (batch_size, 1 );
62+ Eigen::DSizes<int64_t , 2 > one_by_class (1 , num_classes);
63+ Eigen::DSizes<int64_t , 3 > batch_one_remain (batch_size, 1 , num_remain);
64+ Eigen::DSizes<int64_t , 3 > one_axis_one (1 , axis_dim, 1 );
65+ Eigen::DSizes<int64_t , 2 > one_axis (1 , axis_dim);
66+ Eigen::DSizes<int64_t , 3 > batch_axis_remain (
67+ batch_size, axis_dim, num_remain);
6768
6869 // For numerical stability, logits should be shifted by maximum number along
6970 // axis, calculate shifted_logits into softmax tensor for memory reuse.
@@ -106,25 +107,26 @@ class SoftmaxEigen<DeviceContext, phi::float16> {
106107 const int axis_dim,
107108 const phi::DenseTensor* X,
108109 phi::DenseTensor* Y) {
109- constexpr int kBatchDim = 0 ;
110- constexpr int kClassDim = 1 ;
111- constexpr int kAxisDim = 1 ;
110+ constexpr int64_t kBatchDim = 0 ;
111+ constexpr int64_t kClassDim = 1 ;
112+ constexpr int64_t kAxisDim = 1 ;
112113
113114 auto logits = EigenMatrix<phi::float16>::From (*X);
114115 auto softmax = EigenMatrix<phi::float16>::From (*Y);
115116
116- const int batch_size = logits.dimension (kBatchDim );
117- const int num_classes = logits.dimension (kClassDim );
118- const int num_remain = num_classes / axis_dim;
117+ const int64_t batch_size = logits.dimension (kBatchDim );
118+ const int64_t num_classes = logits.dimension (kClassDim );
119+ const int64_t num_remain = num_classes / axis_dim;
119120
120- Eigen::DSizes<int , 1 > along_axis (kAxisDim );
121- Eigen::DSizes<int , 2 > batch_classes (batch_size, num_classes);
122- Eigen::DSizes<int , 2 > batch_by_one (batch_size, 1 );
123- Eigen::DSizes<int , 2 > one_by_class (1 , num_classes);
124- Eigen::DSizes<int , 3 > batch_one_remain (batch_size, 1 , num_remain);
125- Eigen::DSizes<int , 3 > one_axis_one (1 , axis_dim, 1 );
126- Eigen::DSizes<int , 2 > one_axis (1 , axis_dim);
127- Eigen::DSizes<int , 3 > batch_axis_remain (batch_size, axis_dim, num_remain);
121+ Eigen::DSizes<int64_t , 1 > along_axis (kAxisDim );
122+ Eigen::DSizes<int64_t , 2 > batch_classes (batch_size, num_classes);
123+ Eigen::DSizes<int64_t , 2 > batch_by_one (batch_size, 1 );
124+ Eigen::DSizes<int64_t , 2 > one_by_class (1 , num_classes);
125+ Eigen::DSizes<int64_t , 3 > batch_one_remain (batch_size, 1 , num_remain);
126+ Eigen::DSizes<int64_t , 3 > one_axis_one (1 , axis_dim, 1 );
127+ Eigen::DSizes<int64_t , 2 > one_axis (1 , axis_dim);
128+ Eigen::DSizes<int64_t , 3 > batch_axis_remain (
129+ batch_size, axis_dim, num_remain);
128130
129131 // For numerical stability, logits should be shifted by maximum number along
130132 // axis, calculate shifted_logits into softmax tensor for memory reuse.
@@ -164,25 +166,26 @@ class SoftmaxEigen<DeviceContext, phi::bfloat16> {
164166 const int axis_dim,
165167 const phi::DenseTensor* X,
166168 phi::DenseTensor* Y) {
167- constexpr int kBatchDim = 0 ;
168- constexpr int kClassDim = 1 ;
169- constexpr int kAxisDim = 1 ;
169+ constexpr int64_t kBatchDim = 0 ;
170+ constexpr int64_t kClassDim = 1 ;
171+ constexpr int64_t kAxisDim = 1 ;
170172
171173 auto logits = EigenMatrix<phi::bfloat16>::From (*X);
172174 auto softmax = EigenMatrix<phi::bfloat16>::From (*Y);
173175
174- const int batch_size = logits.dimension (kBatchDim );
175- const int num_classes = logits.dimension (kClassDim );
176- const int num_remain = num_classes / axis_dim;
176+ const int64_t batch_size = logits.dimension (kBatchDim );
177+ const int64_t num_classes = logits.dimension (kClassDim );
178+ const int64_t num_remain = num_classes / axis_dim;
177179
178- Eigen::DSizes<int , 1 > along_axis (kAxisDim );
179- Eigen::DSizes<int , 2 > batch_classes (batch_size, num_classes);
180- Eigen::DSizes<int , 2 > batch_by_one (batch_size, 1 );
181- Eigen::DSizes<int , 2 > one_by_class (1 , num_classes);
182- Eigen::DSizes<int , 3 > batch_one_remain (batch_size, 1 , num_remain);
183- Eigen::DSizes<int , 3 > one_axis_one (1 , axis_dim, 1 );
184- Eigen::DSizes<int , 2 > one_axis (1 , axis_dim);
185- Eigen::DSizes<int , 3 > batch_axis_remain (batch_size, axis_dim, num_remain);
180+ Eigen::DSizes<int64_t , 1 > along_axis (kAxisDim );
181+ Eigen::DSizes<int64_t , 2 > batch_classes (batch_size, num_classes);
182+ Eigen::DSizes<int64_t , 2 > batch_by_one (batch_size, 1 );
183+ Eigen::DSizes<int64_t , 2 > one_by_class (1 , num_classes);
184+ Eigen::DSizes<int64_t , 3 > batch_one_remain (batch_size, 1 , num_remain);
185+ Eigen::DSizes<int64_t , 3 > one_axis_one (1 , axis_dim, 1 );
186+ Eigen::DSizes<int64_t , 2 > one_axis (1 , axis_dim);
187+ Eigen::DSizes<int64_t , 3 > batch_axis_remain (
188+ batch_size, axis_dim, num_remain);
186189
187190 // For numerical stability, logits should be shifted by maximum number along
188191 // axis, calculate shifted_logits into softmax tensor for memory reuse.
@@ -236,18 +239,18 @@ class SoftmaxFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
236239 const phi::DenseTensor* X,
237240 phi::DenseTensor* Y) {
238241 const auto & in_dims = X->dims ();
239- constexpr int kBatchDim = 0 ;
240- constexpr int kClassDim = 1 ;
242+ constexpr int64_t kBatchDim = 0 ;
243+ constexpr int64_t kClassDim = 1 ;
241244
242- const int num_classes = in_dims[kClassDim ];
243- const int batch_size = in_dims[kBatchDim ];
244- const int num_remain = num_classes / axis_dim;
245+ const int64_t num_classes = in_dims[kClassDim ];
246+ const int64_t batch_size = in_dims[kBatchDim ];
247+ const int64_t num_remain = num_classes / axis_dim;
245248
246249 if (num_remain == 1 &&
247250 phi::backends::cpu::MayIUse (phi::backends::cpu::avx)) {
248251 const T* in_data = X->data <T>();
249252 T* out_data = Y->data <T>();
250- for (int bs = 0 ; bs < batch_size; ++bs) {
253+ for (int64_t bs = 0 ; bs < batch_size; ++bs) {
251254 T max_val = *std::max_element (in_data, in_data + num_classes);
252255 max_val *= static_cast <T>(-1 );
253256 vec_add_bias<T, phi::backends::cpu::avx>(
@@ -283,18 +286,19 @@ class SoftmaxGradEigen {
283286 auto softmax_grad = EigenMatrix<T>::From (*y_grad);
284287 auto logits_grad = EigenMatrix<T>::From (*x_grad);
285288
286- constexpr int kBatchDim = 0 ;
287- constexpr int kClassDim = 1 ;
289+ constexpr int64_t kBatchDim = 0 ;
290+ constexpr int64_t kClassDim = 1 ;
288291
289- const int batch_size = softmax.dimension (kBatchDim );
290- const int num_classes = softmax.dimension (kClassDim );
291- const int num_remain = num_classes / axis_dim;
292+ const int64_t batch_size = softmax.dimension (kBatchDim );
293+ const int64_t num_classes = softmax.dimension (kClassDim );
294+ const int64_t num_remain = num_classes / axis_dim;
292295
293- Eigen::DSizes<int , 1 > along_class (kClassDim );
294- Eigen::DSizes<int , 2 > batch_by_one (batch_size, 1 );
295- Eigen::DSizes<int , 2 > one_by_class (1 , num_classes);
296- Eigen::DSizes<int , 3 > batch_axis_remain (batch_size, axis_dim, num_remain);
297- Eigen::DSizes<int , 2 > one_axis (1 , axis_dim);
296+ Eigen::DSizes<int64_t , 1 > along_class (kClassDim );
297+ Eigen::DSizes<int64_t , 2 > batch_by_one (batch_size, 1 );
298+ Eigen::DSizes<int64_t , 2 > one_by_class (1 , num_classes);
299+ Eigen::DSizes<int64_t , 3 > batch_axis_remain (
300+ batch_size, axis_dim, num_remain);
301+ Eigen::DSizes<int64_t , 2 > one_axis (1 , axis_dim);
298302
299303 auto dot = (softmax * softmax_grad)
300304 .reshape (batch_axis_remain)
@@ -318,18 +322,19 @@ class SoftmaxGradEigen<DeviceContext, phi::float16> {
318322 auto softmax_grad = EigenMatrix<phi::float16>::From (*y_grad);
319323 auto logits_grad = EigenMatrix<phi::float16>::From (*x_grad);
320324
321- constexpr int kBatchDim = 0 ;
322- constexpr int kClassDim = 1 ;
325+ constexpr int64_t kBatchDim = 0 ;
326+ constexpr int64_t kClassDim = 1 ;
323327
324- const int batch_size = softmax.dimension (kBatchDim );
325- const int num_classes = softmax.dimension (kClassDim );
326- const int num_remain = num_classes / axis_dim;
328+ const int64_t batch_size = softmax.dimension (kBatchDim );
329+ const int64_t num_classes = softmax.dimension (kClassDim );
330+ const int64_t num_remain = num_classes / axis_dim;
327331
328- Eigen::DSizes<int , 1 > along_class (kClassDim );
329- Eigen::DSizes<int , 2 > batch_by_one (batch_size, 1 );
330- Eigen::DSizes<int , 2 > one_by_class (1 , num_classes);
331- Eigen::DSizes<int , 3 > batch_axis_remain (batch_size, axis_dim, num_remain);
332- Eigen::DSizes<int , 2 > one_axis (1 , axis_dim);
332+ Eigen::DSizes<int64_t , 1 > along_class (kClassDim );
333+ Eigen::DSizes<int64_t , 2 > batch_by_one (batch_size, 1 );
334+ Eigen::DSizes<int64_t , 2 > one_by_class (1 , num_classes);
335+ Eigen::DSizes<int64_t , 3 > batch_axis_remain (
336+ batch_size, axis_dim, num_remain);
337+ Eigen::DSizes<int64_t , 2 > one_axis (1 , axis_dim);
333338
334339 auto dot = (softmax * softmax_grad)
335340 .reshape (batch_axis_remain)
@@ -352,18 +357,19 @@ class SoftmaxGradEigen<DeviceContext, phi::bfloat16> {
352357 auto softmax_grad = EigenMatrix<phi::bfloat16>::From (*y_grad);
353358 auto logits_grad = EigenMatrix<phi::bfloat16>::From (*x_grad);
354359
355- constexpr int kBatchDim = 0 ;
356- constexpr int kClassDim = 1 ;
360+ constexpr int64_t kBatchDim = 0 ;
361+ constexpr int64_t kClassDim = 1 ;
357362
358- const int batch_size = softmax.dimension (kBatchDim );
359- const int num_classes = softmax.dimension (kClassDim );
360- const int num_remain = num_classes / axis_dim;
363+ const int64_t batch_size = softmax.dimension (kBatchDim );
364+ const int64_t num_classes = softmax.dimension (kClassDim );
365+ const int64_t num_remain = num_classes / axis_dim;
361366
362- Eigen::DSizes<int , 1 > along_class (kClassDim );
363- Eigen::DSizes<int , 2 > batch_by_one (batch_size, 1 );
364- Eigen::DSizes<int , 2 > one_by_class (1 , num_classes);
365- Eigen::DSizes<int , 3 > batch_axis_remain (batch_size, axis_dim, num_remain);
366- Eigen::DSizes<int , 2 > one_axis (1 , axis_dim);
367+ Eigen::DSizes<int64_t , 1 > along_class (kClassDim );
368+ Eigen::DSizes<int64_t , 2 > batch_by_one (batch_size, 1 );
369+ Eigen::DSizes<int64_t , 2 > one_by_class (1 , num_classes);
370+ Eigen::DSizes<int64_t , 3 > batch_axis_remain (
371+ batch_size, axis_dim, num_remain);
372+ Eigen::DSizes<int64_t , 2 > one_axis (1 , axis_dim);
367373
368374 auto dot = (softmax * softmax_grad)
369375 .reshape (batch_axis_remain)
@@ -393,18 +399,18 @@ class SoftmaxGradFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
393399 const phi::DenseTensor* y_grad,
394400 phi::DenseTensor* x_grad) {
395401 const auto & out_dims = y->dims ();
396- constexpr int kBatchDim = 0 ;
397- constexpr int kClassDim = 1 ;
398- const int num_classes = out_dims[kClassDim ];
399- const int batch_size = out_dims[kBatchDim ];
400- const int num_remain = num_classes / axis_dim;
402+ constexpr int64_t kBatchDim = 0 ;
403+ constexpr int64_t kClassDim = 1 ;
404+ const int64_t num_classes = out_dims[kClassDim ];
405+ const int64_t batch_size = out_dims[kBatchDim ];
406+ const int64_t num_remain = num_classes / axis_dim;
401407
402408 if (num_remain == 1 &&
403409 phi::backends::cpu::MayIUse (phi::backends::cpu::avx)) {
404410 const T* out_data = y->data <T>();
405411 const T* out_grad = y_grad->data <T>();
406412 T* in_grad = x_grad->data <T>();
407- for (int bs = 0 ; bs < batch_size; ++bs) {
413+ for (int64_t bs = 0 ; bs < batch_size; ++bs) {
408414 T scalar;
409415 vec_mul_reduce<T, phi::backends::cpu::avx>(
410416 num_classes, out_grad, out_data, &scalar);
0 commit comments