@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
13
13
limitations under the License. */
14
14
15
15
#pragma once
16
- #include " paddle/framework/eigen.h"
17
16
#include " paddle/framework/op_registry.h"
17
+ #include " paddle/operators/math/math_function.h"
18
18
#include " paddle/operators/strided_memcpy.h"
19
19
20
20
namespace paddle {
@@ -25,109 +25,124 @@ using LoDTensor = framework::LoDTensor;
25
25
using LoD = framework::LoD;
26
26
27
27
template <typename T>
28
- LoD subsequenceLoD (const T* in, const std::vector< int > offsets ,
29
- const std::vector< int > sizes ) {
30
- auto out_lod = in-> lod ();
28
+ LoD SequenceSliceLoD (const T& in, const int64_t * offset_data ,
29
+ const int64_t * length_data ) {
30
+ auto out_lod = in. lod ();
31
31
size_t lod_offset = 0 ;
32
32
33
- auto n = in-> lod ()[0 ].size () - 1 ;
33
+ auto n = in. lod ()[0 ].size () - 1 ;
34
34
out_lod[0 ][0 ] = 0 ;
35
35
for (size_t i = 0 ; i < n; ++i) {
36
- lod_offset += sizes [i];
36
+ lod_offset += length_data [i];
37
37
out_lod[0 ][i+1 ] = lod_offset;
38
38
}
39
39
return out_lod;
40
40
}
41
41
42
42
template <typename Place, typename T>
43
- class SubSequenceOpKernel : public framework ::OpKernel<T> {
43
+ class SequenceSliceOpKernel : public framework ::OpKernel<T> {
44
44
public:
45
45
void Compute (const framework::ExecutionContext& ctx) const override {
46
46
auto * in = ctx.Input <LoDTensor>(" X" );
47
- std::vector< int > offsets = ctx.Attr <std::vector< int >>( " offset " );
48
- std::vector< int > sizes = ctx.Attr <std::vector< int >>( " size " );
47
+ auto * offset = ctx.Input <Tensor>( " Offset " );
48
+ auto * length = ctx.Input <Tensor>( " Length " );
49
49
auto * out = ctx.Output <LoDTensor>(" Out" );
50
50
51
- auto offset_len = offsets.size ();
52
- auto size_len = sizes.size ();
51
+ const int64_t * offset_data = offset->data <int64_t >();
52
+ const int64_t * length_data = length->data <int64_t >();
53
+
54
+ if (platform::is_gpu_place (ctx.GetPlace ())) {
55
+ framework::Tensor offset_cpu;
56
+ offset_cpu.mutable_data <T>(offset->dims (), platform::CPUPlace ());
57
+ offset_cpu.CopyFrom (*offset, platform::CPUPlace (), ctx.device_context ());
58
+ offset_data = offset_cpu.data <int64_t >();
59
+
60
+ framework::Tensor length_cpu;
61
+ length_cpu.mutable_data <T>(length->dims (), platform::CPUPlace ());
62
+ length_cpu.CopyFrom (*length, platform::CPUPlace (), ctx.device_context ());
63
+ length_data = length_cpu.data <int64_t >();
64
+ }
53
65
54
66
auto lod = in->lod ();
55
67
auto n = lod[0 ].size () - 1 ;
56
68
57
69
PADDLE_ENFORCE_EQ (lod.size (), 1UL , " Only support one level sequence now." );
58
- PADDLE_ENFORCE_EQ (n, offset_len,
59
- " The length of input and offset should be the same" )
60
- PADDLE_ENFORCE_EQ (n, size_len,
61
- " The length of input and size should be the same" )
70
+ PADDLE_ENFORCE_EQ (offset->dims ().size (), 1UL ,
71
+ " Only support one level sequence now." );
72
+ PADDLE_ENFORCE_EQ (length->dims ().size (), 1UL ,
73
+ " Only support one level sequence now." );
74
+ PADDLE_ENFORCE_EQ (
75
+ n, length->dims ()[0 ],
76
+ " The size of input-sequence and length-array should be the same" )
77
+ PADDLE_ENFORCE_EQ (
78
+ n, offset->dims ()[0 ],
79
+ " The size of input-sequence and offset-array should be the same" )
62
80
63
81
for (size_t i = 0 ; i < n; ++i) {
64
- auto offset = offsets [i];
65
- auto size = sizes [i];
66
- PADDLE_ENFORCE_LT (lod[0 ][i] + offset + size, lod[ 0 ][i + 1 ],
67
- " The target tensor's length overflow" )
82
+ PADDLE_ENFORCE_LT ( 0 , offset_data [i], " The offset must greater than zero " )
83
+ PADDLE_ENFORCE_LT ( 0 , length_data [i], " The length must greater than zero " )
84
+ PADDLE_ENFORCE_LT (lod[0 ][i] + offset_data[i] + length_data[i ],
85
+ lod[ 0 ][i + 1 ], " The target tensor's length overflow" )
68
86
}
69
87
70
88
out->mutable_data <T>(ctx.GetPlace ());
71
- auto out_lod = subsequenceLoD ( in, offsets, sizes );
89
+ auto out_lod = SequenceSliceLoD (* in, offset_data, length_data );
72
90
out->set_lod (out_lod);
91
+ math::SetConstant<Place, T> set_zero;
92
+ set_zero (ctx.device_context (), out, static_cast <T>(0 ));
73
93
74
94
auto in_stride = framework::stride (in->dims ());
75
95
auto out_stride = framework::stride (out->dims ());
76
96
77
97
size_t out_offset = 0 ;
78
98
for (size_t i = 0 ; i < n; ++i) {
79
- auto offset = offsets[i];
80
- auto size = sizes[i];
81
-
82
- Tensor in_t = in->Slice (static_cast <int >(lod[0 ][i] + offset),
83
- static_cast <int >(lod[0 ][i] + offset + size));
99
+ Tensor in_t =
100
+ in->Slice (static_cast <int >(lod[0 ][i] + offset_data[i]),
101
+ static_cast <int >(lod[0 ][i] + offset_data[i] +
102
+ length_data[i]));
84
103
85
104
StridedMemcpy<T>(ctx.device_context (), in_t .data <T>(),
86
105
in_stride, in_t .dims (), out_stride,
87
106
out->data <T>() + out_offset);
88
- out_offset += size * in_stride[0 ];
107
+ out_offset += length_data[i] * in_stride[0 ];
89
108
}
90
109
}
91
110
};
92
111
93
112
template <typename Place, typename T>
94
- class SubSequenceGradOpKernel : public framework ::OpKernel<T> {
113
+ class SequenceSliceGradOpKernel : public framework ::OpKernel<T> {
95
114
public:
96
115
void Compute (const framework::ExecutionContext& ctx) const override {
97
116
auto * in = ctx.Input <LoDTensor>(" X" );
98
- std::vector< int > offsets = ctx.Attr <std::vector< int >>( " offset " );
99
- std::vector< int > sizes = ctx.Attr <std::vector< int >>( " size " );
117
+ auto * offset = ctx.Input <Tensor>( " Offset " );
118
+ auto * length = ctx.Input <Tensor>( " Length " );
100
119
auto * out_grad =
101
120
ctx.Input <framework::LoDTensor>(framework::GradVarName (" Out" ));
102
121
auto * x_grad =
103
122
ctx.Output <framework::LoDTensor>(framework::GradVarName (" X" ));
104
123
105
- auto offset_len = offsets. size ();
106
- auto size_len = sizes. size ();
124
+ const int64_t * offset_data = offset-> data < int64_t > ();
125
+ const int64_t * length_data = length-> data < int64_t > ();
107
126
108
- auto lod = in->lod ();
109
- auto n = lod[0 ].size () - 1 ;
127
+ if (platform::is_gpu_place (ctx.GetPlace ())) {
128
+ framework::Tensor offset_cpu;
129
+ offset_cpu.mutable_data <T>(offset->dims (), platform::CPUPlace ());
130
+ offset_cpu.CopyFrom (*offset, platform::CPUPlace (), ctx.device_context ());
131
+ offset_data = offset_cpu.data <int64_t >();
110
132
111
- // check input data format
112
- PADDLE_ENFORCE_EQ (lod.size (), 1UL , " Only support one level sequence now." );
113
- PADDLE_ENFORCE_EQ (n, offset_len,
114
- " The length of input and offset should be the same" )
115
- PADDLE_ENFORCE_EQ (n, size_len,
116
- " The length of input and size should be the same" )
117
-
118
- for (size_t i = 0 ; i < n; ++i) {
119
- auto offset = offsets[i];
120
- auto size = sizes[i];
121
- PADDLE_ENFORCE_LT (lod[0 ][i] + offset + size, lod[0 ][i + 1 ],
122
- " The target tensor's length overflow" )
133
+ framework::Tensor length_cpu;
134
+ length_cpu.mutable_data <T>(length->dims (), platform::CPUPlace ());
135
+ length_cpu.CopyFrom (*length, platform::CPUPlace (), ctx.device_context ());
136
+ length_data = length_cpu.data <int64_t >();
123
137
}
124
138
125
- auto out_lod = subsequenceLoD (in, offsets, sizes);
139
+ auto lod = in->lod ();
140
+ auto out_lod = SequenceSliceLoD (*in, offset_data, length_data);
126
141
127
142
x_grad->set_lod (lod);
128
143
x_grad->mutable_data <T>(ctx.GetPlace ());
129
- auto temp = framework::EigenVector<T>:: Flatten (*x_grad) ;
130
- temp. device (ctx.GetEigenDevice <Place>()) = temp. constant ( static_cast <T>(0 ));
144
+ math::SetConstant<Place, T> set_zero ;
145
+ set_zero (ctx.device_context (), x_grad, static_cast <T>(0 ));
131
146
132
147
auto out_grad_stride = framework::stride (out_grad->dims ());
133
148
@@ -139,11 +154,9 @@ class SubSequenceGradOpKernel : public framework::OpKernel<T> {
139
154
140
155
auto x_grad_stride = framework::stride (x_grad->dims ());
141
156
142
- auto offset = offsets[i];
143
- auto size = sizes[i];
144
-
145
- Tensor x_grad_t = x_grad->Slice (static_cast <int >(lod[0 ][i] + offset),
146
- static_cast <int >(lod[0 ][i] + offset + size));
157
+ Tensor x_grad_t = x_grad->Slice (
158
+ static_cast <int >(lod[0 ][i] + offset_data[i]),
159
+ static_cast <int >(lod[0 ][i] + offset_data[i] + length_data[i]));
147
160
148
161
StridedMemcpy<T>(ctx.device_context (), out_grad_t .data <T>(),
149
162
out_grad_stride, out_grad_t .dims (), x_grad_stride,
0 commit comments