@@ -28,8 +28,6 @@ class SequenceReshapeKernel : public framework::OpKernel<T> {
28
28
auto * out = context.Output <LoDTensor>(" Out" );
29
29
int out_width = context.Attr <int >(" new_dim" );
30
30
31
- const T* p_in_data = in->data <T>();
32
-
33
31
auto in_dims = in->dims ();
34
32
int64_t in_width = in_dims[1 ];
35
33
auto & in_lod = in->lod ();
@@ -43,53 +41,29 @@ class SequenceReshapeKernel : public framework::OpKernel<T> {
43
41
auto in_lod_l0 = in_lod[0 ];
44
42
int seq_num = in_lod_l0.size () - 1 ;
45
43
46
- auto & out_lod = *out-> mutable_lod ();
47
- out_lod. resize ( 1 );
48
- out_lod[ 0 ]. clear ();
49
- out_lod[ 0 ]. push_back ( 0 );
50
- for ( int i = 0 ; i < seq_num; ++i) {
51
- size_t seq_len = in_lod_l0[i + 1 ] - in_lod_l0[i] ;
52
- size_t offset = 0 ;
53
- offset = (seq_len * in_width) / out_width;
54
- PADDLE_ENFORCE_EQ (offset * out_width, seq_len * in_width,
55
- " Please make sure (sequence_length * dimension) can be "
56
- " divided by new_dim with no remainder for each "
57
- " sequence. The %dth sequence is invalid. " ,
58
- i + 1 );
59
- PADDLE_ENFORCE_GT (offset, 0 ,
60
- " Illegal operation, length of the %dth sequence become "
61
- " to 0 after reshaped. " ,
62
- i + 1 );
63
- out_lod[ 0 ]. push_back (out_lod[ 0 ]. back () + offset);
44
+ if (in_width == out_width) {
45
+ out-> set_lod (in-> lod () );
46
+ } else {
47
+ auto & out_lod = *out-> mutable_lod ( );
48
+ out_lod. resize ( 1 );
49
+ out_lod[ 0 ]. clear () ;
50
+ out_lod[ 0 ]. push_back ( 0 ) ;
51
+ for ( int i = 0 ; i < seq_num; ++i) {
52
+ size_t seq_len = in_lod_l0[i + 1 ] - in_lod_l0[i];
53
+ size_t offset = 0 ;
54
+ offset = (seq_len * in_width) / out_width;
55
+ PADDLE_ENFORCE_EQ (offset * out_width, seq_len * in_width ,
56
+ " Please make sure (sequence_length * dimension) can "
57
+ " be divided by new_dim with no remainder for each "
58
+ " sequence. The %dth sequence is invalid. " ,
59
+ i + 1 );
60
+ out_lod[ 0 ]. push_back (out_lod[ 0 ]. back () + offset );
61
+ }
64
62
}
65
63
66
64
out->mutable_data <T>(context.GetPlace ());
67
- out->Resize ({static_cast <int64_t >(out_lod[0 ].back ()), out_width});
68
- T* p_out_data = out->mutable_data <T>(context.GetPlace ());
69
- math::set_constant (context.device_context (), out, 0 .0f );
70
-
71
- for (int i = 0 ; i < seq_num; ++i) {
72
- size_t in_offset = in_lod_l0[i] * in_width;
73
- size_t out_offset = out_lod[0 ][i] * out_width;
74
- size_t in_count = (in_lod_l0[i + 1 ] - in_lod_l0[i]) * in_width;
75
- size_t out_count = (out_lod[0 ][i + 1 ] - out_lod[0 ][i]) * out_width;
76
- size_t bytes = sizeof (T) * std::min (in_count, out_count);
77
- if (platform::is_cpu_place (context.GetPlace ())) {
78
- memory::Copy (boost::get<platform::CPUPlace>(context.GetPlace ()),
79
- p_out_data + out_offset,
80
- boost::get<platform::CPUPlace>(context.GetPlace ()),
81
- p_in_data + in_offset, bytes);
82
- } else {
83
- #ifdef PADDLE_WITH_CUDA
84
- auto & dev_ctx =
85
- context.template device_context <platform::CUDADeviceContext>();
86
- memory::Copy (boost::get<platform::CUDAPlace>(context.GetPlace ()),
87
- p_out_data + out_offset,
88
- boost::get<platform::CUDAPlace>(context.GetPlace ()),
89
- p_in_data + in_offset, bytes, dev_ctx.stream ());
90
- #endif
91
- }
92
- }
65
+ framework::Copy (*in, context.GetPlace (), out);
66
+ out->Resize ({static_cast <int64_t >(out->lod ()[0 ].back ()), out_width});
93
67
}
94
68
};
95
69
@@ -98,45 +72,14 @@ class SequenceReshapeGradKernel : public framework::OpKernel<T> {
98
72
public:
99
73
void Compute (const framework::ExecutionContext& context) const override {
100
74
auto * x_tensor_ptr = context.Input <LoDTensor>(" X" );
101
- auto * out_tensor_ptr = context.Input <LoDTensor>(" Out" );
102
- auto * out_grad_tensor_ptr =
75
+ auto * outg_tensor_ptr =
103
76
context.Input <LoDTensor>(framework::GradVarName (" Out" ));
104
- auto * x_grad_tensor_ptr =
77
+ auto * xg_tensor_ptr =
105
78
context.Output <LoDTensor>(framework::GradVarName (" X" ));
106
79
107
- T* p_x_grad_data = x_grad_tensor_ptr->mutable_data <T>(context.GetPlace ());
108
- const T* p_out_grad_data = out_grad_tensor_ptr->data <T>();
109
-
110
- auto & x_lod = x_tensor_ptr->lod ();
111
- int seq_num = x_lod[0 ].size () - 1 ;
112
- int x_width = x_tensor_ptr->dims ()[1 ];
113
- auto & out_lod = out_tensor_ptr->lod ();
114
- int out_width = out_tensor_ptr->dims ()[1 ];
115
-
116
- math::set_constant (context.device_context (), x_grad_tensor_ptr, 0 .0f );
117
-
118
- for (int i = 0 ; i < seq_num; ++i) {
119
- size_t src_offset = out_lod[0 ][i] * out_width;
120
- size_t dst_offset = x_lod[0 ][i] * x_width;
121
- size_t src_count = (out_lod[0 ][i + 1 ] - out_lod[0 ][i]) * out_width;
122
- size_t dst_count = (x_lod[0 ][i + 1 ] - x_lod[0 ][i]) * x_width;
123
- size_t bytes = sizeof (T) * std::min (src_count, dst_count);
124
- if (platform::is_cpu_place (context.GetPlace ())) {
125
- memory::Copy (boost::get<platform::CPUPlace>(context.GetPlace ()),
126
- p_x_grad_data + dst_offset,
127
- boost::get<platform::CPUPlace>(context.GetPlace ()),
128
- p_out_grad_data + src_offset, bytes);
129
- } else {
130
- #ifdef PADDLE_WITH_CUDA
131
- auto & dev_ctx =
132
- context.template device_context <platform::CUDADeviceContext>();
133
- memory::Copy (boost::get<platform::CUDAPlace>(context.GetPlace ()),
134
- p_x_grad_data + dst_offset,
135
- boost::get<platform::CUDAPlace>(context.GetPlace ()),
136
- p_out_grad_data + src_offset, bytes, dev_ctx.stream ());
137
- #endif
138
- }
139
- }
80
+ xg_tensor_ptr->mutable_data <T>(context.GetPlace ());
81
+ framework::Copy (*outg_tensor_ptr, context.GetPlace (), xg_tensor_ptr);
82
+ xg_tensor_ptr->Resize (x_tensor_ptr->dims ());
140
83
}
141
84
};
142
85
0 commit comments