@@ -18,37 +18,45 @@ namespace paddle {
18
18
namespace operators {
19
19
namespace math {
20
20
21
+ enum CopyType { kSeqToPad , kPadToSeq };
22
+
21
23
template <typename T>
22
- void CopyDataCPU (framework::LoDTensor* seq_tensor,
23
- framework::Tensor* pad_tensor,
24
- const framework::Vector<size_t >& seq_offset,
25
- const int64_t & max_seq_len, const int64_t & seq_width,
26
- bool seq_to_pad, bool norm_by_len,
27
- OutputLayout output_layout) {
28
- T* seq_data = seq_tensor->data <T>();
29
- T* pad_data = pad_tensor->data <T>();
30
-
31
- int64_t seq_num = seq_offset.size () - 1 ;
32
-
33
- for (int64_t i = 0 ; i < seq_num; ++i) {
34
- int64_t seq_start = seq_offset[i];
35
- int64_t seq_len = seq_offset[i + 1 ] - seq_start;
36
- T scale = norm_by_len ? (1 .0f / static_cast <T>(seq_len)) : 1 .0f ;
37
- for (int64_t j = 0 ; j < seq_len; ++j) {
38
- for (int64_t k = 0 ; k < seq_width; ++k) {
39
- size_t pad_data_idx = 0 ;
40
- size_t seq_data_idx = (seq_start + j) * seq_width + k;
41
- if (output_layout == kBatchLengthWidth ) {
42
- pad_data_idx = (i * max_seq_len + j) * seq_width + k;
43
- } else {
44
- pad_data_idx = (j * seq_num + i) * seq_width + k;
45
- }
46
- if (seq_to_pad) {
47
- pad_data[pad_data_idx] = seq_data[seq_data_idx] * scale;
48
- } else {
49
- seq_data[seq_data_idx] = pad_data[pad_data_idx] * scale;
24
+ void CopyValidData (framework::Tensor* dst_tensor,
25
+ const framework::Tensor* src_tensor,
26
+ const framework::Vector<size_t >& seq_offsets,
27
+ int pad_seq_len, int step_width, bool norm_by_len,
28
+ CopyType type, PadLayout layout) {
29
+ int seq_num = seq_offsets.size () - 1 ;
30
+ const T* src_data = src_tensor->data <T>();
31
+ T* dst_data = dst_tensor->data <T>();
32
+
33
+ int seq_cpy_gap = step_width;
34
+ int pad_cpy_gap =
35
+ layout == kBatchLengthWidth ? step_width : seq_num * step_width;
36
+ for (int seq_idx = 0 ; seq_idx < seq_num; ++seq_idx) {
37
+ int valid_seq_len = seq_offsets[seq_idx + 1 ] - seq_offsets[seq_idx];
38
+ PADDLE_ENFORCE_GE (
39
+ pad_seq_len, valid_seq_len,
40
+ " The padded sequence length can not be less than its original length." );
41
+ int seq_data_offset = seq_offsets[seq_idx] * step_width;
42
+ int pad_data_offset = layout == kBatchLengthWidth
43
+ ? seq_idx * pad_seq_len * step_width
44
+ : seq_idx * step_width;
45
+ float scale = 1 .0f / static_cast <float >(valid_seq_len);
46
+
47
+ for (int step_idx = 0 ; step_idx < valid_seq_len; ++step_idx) {
48
+ const T* src =
49
+ src_data + (type == kSeqToPad ? seq_data_offset : pad_data_offset);
50
+ T* dst =
51
+ dst_data + (type == kSeqToPad ? pad_data_offset : seq_data_offset);
52
+ memcpy (dst, src, step_width * sizeof (T));
53
+ if (norm_by_len) {
54
+ for (int i = 0 ; i < step_width; ++i) {
55
+ *(dst + i) *= scale;
50
56
}
51
57
}
58
+ seq_data_offset += seq_cpy_gap;
59
+ pad_data_offset += pad_cpy_gap;
52
60
}
53
61
}
54
62
}
@@ -58,62 +66,61 @@ class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
58
66
public:
59
67
void operator ()(const platform::CPUDeviceContext& context,
60
68
const framework::LoDTensor& seq_tensor,
61
- framework::Tensor* pad_tensor,
62
- T pad_value = static_cast <T>(0 ), bool norm_by_times = false,
63
- size_t lod_level = 0,
64
- OutputLayout output_layout = kBatchLengthWidth) {
65
- CheckLoD (seq_tensor, lod_level);
66
-
67
- auto & lod = seq_tensor.lod ();
68
- auto & seq_offset = framework::ToAbsOffset (lod)[lod_level];
69
-
69
+ framework::LoDTensor* pad_tensor,
70
+ std::vector<T> pad_value = {0 }, int pad_seq_len = -1 ,
71
+ int lod_level = 0 , bool norm_by_times = false ,
72
+ const PadLayout layout = kBatchLengthWidth ) {
73
+ auto seq_offsets = framework::ToAbsOffset (seq_tensor.lod ())[lod_level];
70
74
auto seq_tensor_dims = seq_tensor.dims ();
71
75
auto pad_tensor_dims = pad_tensor->dims ();
72
- int64_t max_seq_len = MaximumSequenceLength (seq_offset);
73
- int64_t seq_num = seq_offset.size () - 1 ;
74
- int64_t seq_width = seq_tensor.numel () / seq_tensor_dims[0 ];
76
+ if (pad_seq_len == -1 ) {
77
+ pad_seq_len = MaximumSequenceLength (seq_offsets);
78
+ }
79
+ int step_width = seq_tensor.numel () / seq_tensor_dims[0 ];
75
80
76
- CheckDims (seq_tensor_dims, seq_offset.back (), pad_tensor_dims, max_seq_len,
77
- seq_num, seq_width, output_layout);
81
+ CheckDims (seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len,
82
+ step_width, layout);
83
+ PADDLE_ENFORCE (pad_value.size () == 1 ||
84
+ static_cast <int >(pad_value.size ()) == step_width,
85
+ " The size of 'pad_value' can only be 1 or be equal to the "
86
+ " 'step_width'." );
78
87
79
- T* pad_data = pad_tensor->data <T>();
88
+ if (pad_value.size () == 1 ) {
89
+ pad_value = std::vector<T>(step_width, pad_value[0 ]);
90
+ }
80
91
81
- memset (pad_data, pad_value, max_seq_len * seq_num * seq_width * sizeof (T));
92
+ // fill padding value
93
+ T* pad_data = pad_tensor->data <T>();
94
+ for (int i = 0 ; i < pad_tensor->numel () / step_width; ++i) {
95
+ memcpy (pad_data, pad_value.data (), step_width * sizeof (T));
96
+ }
82
97
83
- CopyDataCPU<T>(const_cast <framework::LoDTensor*>(&seq_tensor), pad_tensor,
84
- seq_offset, max_seq_len, seq_width, true /* seq_to_pad */ ,
85
- norm_by_times, output_layout);
98
+ CopyValidData<T>(pad_tensor, &seq_tensor, seq_offsets, pad_seq_len,
99
+ step_width, norm_by_times, kSeqToPad , layout);
86
100
}
87
101
};
88
102
89
103
template <typename T>
90
104
class UnpaddingLoDTensorFunctor <platform::CPUDeviceContext, T> {
91
105
public:
92
106
void operator ()(const platform::CPUDeviceContext& context,
93
- framework::LoDTensor* seq_tensor,
94
- const framework::Tensor& pad_tensor,
95
- bool norm_by_times = false , size_t lod_level = 0 ,
96
- OutputLayout output_layout = kBatchLengthWidth ) {
97
- CheckLoD (*seq_tensor, lod_level);
98
-
99
- auto & lod = seq_tensor->lod ();
100
- auto & seq_offset = framework::ToAbsOffset (lod)[lod_level];
101
-
102
- auto & seq_tensor_dims = seq_tensor->dims ();
103
- auto & pad_tensor_dims = pad_tensor.dims ();
104
- int64_t max_seq_len = MaximumSequenceLength (seq_offset);
105
- int64_t seq_num = seq_offset.size () - 1 ;
106
- int64_t seq_width = seq_tensor->numel () / seq_tensor_dims[0 ];
107
-
108
- CheckDims (seq_tensor_dims, seq_offset.back (), pad_tensor_dims, max_seq_len,
109
- seq_num, seq_width, output_layout);
110
-
111
- T* seq_data = seq_tensor->data <T>();
112
- memset (seq_data, static_cast <T>(0 ), seq_tensor->numel () * sizeof (T));
113
-
114
- CopyDataCPU<T>(seq_tensor, const_cast <framework::Tensor*>(&pad_tensor),
115
- seq_offset, max_seq_len, seq_width, false /* seq_to_pad */ ,
116
- norm_by_times, output_layout);
107
+ const framework::LoDTensor& pad_tensor,
108
+ framework::LoDTensor* seq_tensor, int pad_seq_len = -1 ,
109
+ int lod_level = 0 , bool norm_by_times = false ,
110
+ const PadLayout& layout = kBatchLengthWidth ) {
111
+ auto seq_offsets = framework::ToAbsOffset (seq_tensor->lod ())[lod_level];
112
+ auto seq_tensor_dims = seq_tensor->dims ();
113
+ auto pad_tensor_dims = pad_tensor.dims ();
114
+ if (pad_seq_len == -1 ) {
115
+ pad_seq_len = MaximumSequenceLength (seq_offsets);
116
+ }
117
+ int step_width = seq_tensor->numel () / seq_tensor_dims[0 ];
118
+
119
+ CheckDims (seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len,
120
+ step_width, layout);
121
+
122
+ CopyValidData<T>(seq_tensor, &pad_tensor, seq_offsets, pad_seq_len,
123
+ step_width, norm_by_times, kPadToSeq , layout);
117
124
}
118
125
};
119
126
0 commit comments