@@ -18,128 +18,111 @@ namespace paddle {
18
18
namespace operators {
19
19
namespace math {
20
20
21
- template <typename T>
22
- class PaddingLoDTensorFunctor <platform::CPUDeviceContext, T> {
23
- public:
24
- void operator ()(const platform::CPUDeviceContext& context,
25
- const framework::LoDTensor& seq, framework::Tensor* padding,
26
- bool norm_by_times) {
27
- auto lod = seq.lod ();
28
- PADDLE_ENFORCE_GT (lod.size (), 0UL ,
29
- " The LoD of LoDTensor seq should not be null." );
30
-
31
- const size_t level = 0 ;
32
- framework::LoD abs_offset_lod = framework::ToAbsOffset (lod);
33
-
34
- auto seq_dims = seq.dims ();
35
- PADDLE_ENFORCE_EQ (seq_dims[0 ],
36
- static_cast <int64_t >(abs_offset_lod[level].back ()),
37
- " The first dimension of LoDTensor seq should be "
38
- " equal to the sum of all sequences's length." );
39
-
40
- auto padding_dims = padding->dims ();
41
- PADDLE_ENFORCE_EQ (padding_dims.size (), 3UL ,
42
- " The input padding should be a 3-D Tensor of shape "
43
- " [max_sequence_length, num_sequences, sequence_width]." );
44
-
45
- const int64_t max_sequence_length = MaximumSequenceLength (lod, level);
46
- PADDLE_ENFORCE_EQ (padding_dims[0 ], max_sequence_length,
47
- " The first dimension of Tensor padding should be the "
48
- " maximum length of all sequences in LoDTensor seq." );
49
-
50
- const int64_t num_sequences = abs_offset_lod[level].size () - 1 ;
51
- PADDLE_ENFORCE_EQ (padding_dims[1 ], num_sequences,
52
- " The second dimension of Tensor padding should be the "
53
- " number of sequences in LoDTensor seq." );
54
-
55
- const int64_t sequence_width = seq.numel () / seq_dims[0 ];
56
- PADDLE_ENFORCE_EQ (padding_dims[2 ], sequence_width,
57
- " The third dimension of Tensor padding should be the "
58
- " width of sequence in LoDTensor seq." );
59
-
60
- const T* seq_data = seq.data <T>();
61
- T* padding_data = padding->data <T>();
62
- for (int64_t i = 0 ; i < max_sequence_length; ++i) {
63
- for (int64_t j = 0 ; j < num_sequences; ++j) {
64
- int64_t start_pos = abs_offset_lod[level][j];
65
- int64_t sequence_length = abs_offset_lod[level][j + 1 ] - start_pos;
66
- if (i < sequence_length) {
67
- // i > 0 => sequence_length > 0
68
- T scale =
69
- norm_by_times ? (1 .0f / static_cast <T>(sequence_length)) : 1 .0f ;
70
- for (int64_t k = 0 ; k < sequence_width; ++k) {
71
- padding_data[(i * num_sequences + j) * sequence_width + k] =
72
- seq_data[(start_pos + i) * sequence_width + k] * scale;
73
- }
21
+ template <typename T, PaddingLayout padding_layout>
22
+ void CopyDataCPU (framework::LoDTensor* seq_tensor,
23
+ framework::Tensor* padding_tensor,
24
+ const framework::Vector<size_t >& abs_offset,
25
+ const int64_t & max_seq_len, const int64_t & seq_width,
26
+ bool seq_to_padding, bool norm_by_len) {
27
+ T* seq_data = seq_tensor->data <T>();
28
+ T* padding_data = padding_tensor->data <T>();
29
+
30
+ int64_t seq_num = abs_offset.size () - 1 ;
31
+
32
+ for (int64_t i = 0 ; i < seq_num; ++i) {
33
+ int64_t seq_start = abs_offset[i];
34
+ int64_t seq_len = abs_offset[i + 1 ] - seq_start;
35
+
36
+ T scale = norm_by_len ? (1 .0f / static_cast <T>(seq_len)) : 1 .0f ;
37
+
38
+ for (int64_t j = 0 ; j < seq_len; ++j) {
39
+ for (int64_t k = 0 ; k < seq_width; ++k) {
40
+ size_t padding_offset = 0 ;
41
+ if (padding_layout == BATCH_LENGTH_WIDTH) {
42
+ padding_offset = (i * max_seq_len * seq_width) + j * seq_width + k;
43
+ } else {
44
+ padding_offset = (j * seq_num * seq_width) + i * seq_width + k;
45
+ }
46
+ if (seq_to_padding) {
47
+ padding_data[padding_offset] =
48
+ seq_data[(seq_start + j) * seq_width + k] * scale;
74
49
} else {
75
- memset (padding_data + (i * num_sequences + j) * sequence_width, 0 ,
76
- sequence_width * sizeof (T)) ;
50
+ seq_data[(seq_start + j) * seq_width + k] =
51
+ padding_data[padding_offset] * scale ;
77
52
}
78
53
}
79
54
}
80
55
}
56
+ }
57
+
58
+ template <typename T, PaddingLayout padding_layout>
59
+ class PaddingLoDTensorFunctor <platform::CPUDeviceContext, T, padding_layout> {
60
+ public:
61
+ void operator ()(const platform::CPUDeviceContext& context,
62
+ const framework::LoDTensor& seq_tensor,
63
+ framework::Tensor* padding_tensor,
64
+ T padding_value = static_cast <T>(0 ),
65
+ bool norm_by_times = false, size_t lod_level = 0) {
66
+ ValidateLoD (seq_tensor, lod_level);
67
+
68
+ auto & lod = seq_tensor.lod ();
69
+ auto & abs_offset = framework::ToAbsOffset (lod)[lod_level];
70
+
71
+ auto seq_dims = seq_tensor.dims ();
72
+ auto padding_dims = padding_tensor->dims ();
73
+ int64_t max_seq_len = MaximumSequenceLength (lod, lod_level);
74
+ int64_t seq_num = abs_offset.size () - 1 ;
75
+ int64_t seq_width = seq_tensor.numel () / seq_dims[0 ];
76
+ int64_t numel = max_seq_len * seq_num * seq_width;
77
+
78
+ ValidateShape (seq_dims, abs_offset.back (), padding_dims, max_seq_len,
79
+ seq_num, seq_width, padding_layout);
80
+
81
+ T* padding_data = padding_tensor->data <T>();
82
+
83
+ memset (padding_data, padding_value, numel * sizeof (T));
84
+
85
+ CopyDataCPU<T, padding_layout>(
86
+ const_cast <framework::LoDTensor*>(&seq_tensor), padding_tensor,
87
+ abs_offset, max_seq_len, seq_width, true /* seq_to_padding */ ,
88
+ norm_by_times);
89
+ }
81
90
};
82
91
83
- template <typename T>
84
- class UnpaddingLoDTensorFunctor <platform::CPUDeviceContext, T> {
92
+ template <typename T, PaddingLayout padding_layout >
93
+ class UnpaddingLoDTensorFunctor <platform::CPUDeviceContext, T, padding_layout > {
85
94
public:
86
95
void operator ()(const platform::CPUDeviceContext& context,
87
- framework::LoDTensor* seq, const framework::Tensor& padding,
88
- bool norm_by_times) {
89
- auto lod = seq->lod ();
90
- PADDLE_ENFORCE_GT (lod.size (), 0UL ,
91
- " The LoD of LoDTensor seq should not be null." );
92
-
93
- const size_t level = 0 ;
94
- framework::LoD abs_offset_lod = framework::ToAbsOffset (lod);
95
-
96
- auto seq_dims = seq->dims ();
97
- PADDLE_ENFORCE_EQ (seq_dims[0 ],
98
- static_cast <int64_t >(abs_offset_lod[level].back ()),
99
- " The first dimension of LoDTensor seq should be "
100
- " equal to the sum of all sequences's length." );
101
-
102
- auto padding_dims = padding.dims ();
103
- PADDLE_ENFORCE_EQ (padding_dims.size (), 3UL ,
104
- " The input padding should be a 3-D Tensor of shape "
105
- " [max_sequnece_length, num_sequences, sequence_width]." );
106
-
107
- const int64_t max_sequence_length = MaximumSequenceLength (lod, level);
108
- PADDLE_ENFORCE_EQ (padding_dims[0 ], max_sequence_length,
109
- " The first dimension of Tensor padding should be "
110
- " the maximum length of all sequences in LoDTensor seq." );
111
-
112
- const int64_t num_sequences = abs_offset_lod[level].size () - 1 ;
113
- PADDLE_ENFORCE_EQ (padding_dims[1 ], num_sequences,
114
- " The second dimension of Tensor padding should be "
115
- " the number of sequences in LoDTensor seq." );
116
-
117
- const int64_t sequence_width = seq->numel () / seq_dims[0 ];
118
- PADDLE_ENFORCE_EQ (padding_dims[2 ], sequence_width,
119
- " The third dimension of Tensor padding should be the "
120
- " width of sequence in LoDTensor seq." );
121
-
122
- const T* padding_data = padding.data <T>();
123
- T* seq_data = seq->data <T>();
124
- for (int64_t i = 0 ; i < num_sequences; ++i) {
125
- int64_t start_pos = abs_offset_lod[level][i];
126
- int64_t sequence_length = abs_offset_lod[level][i + 1 ] - start_pos;
127
- for (int64_t j = 0 ; j < sequence_length; ++j) {
128
- // sequence_width > j > 0
129
- T scale =
130
- norm_by_times ? (1 .0f / static_cast <T>(sequence_length)) : 1 .0f ;
131
- for (int64_t k = 0 ; k < sequence_width; ++k) {
132
- seq_data[(start_pos + j) * sequence_width + k] =
133
- padding_data[(j * num_sequences + i) * sequence_width + k] *
134
- scale;
135
- }
136
- }
137
- }
96
+ framework::LoDTensor* seq_tensor,
97
+ const framework::Tensor& padding_tensor,
98
+ bool norm_by_times = false , size_t lod_level = 0 ) {
99
+ ValidateLoD (*seq_tensor, lod_level);
100
+
101
+ auto & lod = seq_tensor->lod ();
102
+ auto & abs_offset = framework::ToAbsOffset (lod)[lod_level];
103
+
104
+ auto & seq_dims = seq_tensor->dims ();
105
+ auto & padding_dims = padding_tensor.dims ();
106
+ int64_t max_seq_len = MaximumSequenceLength (lod, lod_level);
107
+ int64_t seq_num = abs_offset.size () - 1 ;
108
+ int64_t seq_width = seq_tensor->numel () / seq_dims[0 ];
109
+
110
+ ValidateShape (seq_dims, abs_offset.back (), padding_dims, max_seq_len,
111
+ seq_num, seq_width, padding_layout);
112
+
113
+ T* seq_data = seq_tensor->data <T>();
114
+ memset (seq_data, static_cast <T>(0 ), seq_tensor->numel () * sizeof (T));
115
+
116
+ CopyDataCPU<T, padding_layout>(
117
+ seq_tensor, const_cast <framework::Tensor*>(&padding_tensor), abs_offset,
118
+ max_seq_len, seq_width, false /* seq_to_padding */ , norm_by_times);
138
119
}
139
120
};
140
121
141
- template class PaddingLoDTensorFunctor <platform::CPUDeviceContext, float >;
142
- template class UnpaddingLoDTensorFunctor <platform::CPUDeviceContext, float >;
122
+ template class PaddingLoDTensorFunctor <platform::CPUDeviceContext, float ,
123
+ LENGTH_BATCH_WIDTH>;
124
+ template class UnpaddingLoDTensorFunctor <platform::CPUDeviceContext, float ,
125
+ LENGTH_BATCH_WIDTH>;
143
126
144
127
} // namespace math
145
128
} // namespace operators
0 commit comments