20
20
#include < algorithm>
21
21
#include < limits>
22
22
23
+ #include " paddle/framework/eigen.h"
24
+
23
25
namespace paddle {
24
26
namespace framework {
25
27
@@ -104,10 +106,10 @@ void TensorArray::Write(size_t index, const LoDTensor& value) {
104
106
values_.resize (index + 1 );
105
107
}
106
108
109
+ values_[index].set_lod (value.lod ());
107
110
values_[index].Resize (value.dims ());
108
- values_[index].mutable_data <value_type>(platform::CPUPlace ());
109
- values_[index].CopyFrom (value, platform::CPUPlace (),
110
- platform::CPUDeviceContext ());
111
+ values_[index].mutable_data <value_type>(value.place ());
112
+ values_[index].CopyFrom (value, value.place (), platform::CPUDeviceContext ());
111
113
}
112
114
113
115
void TensorArray::WriteShared (size_t index, const LoDTensor& value) {
@@ -116,6 +118,7 @@ void TensorArray::WriteShared(size_t index, const LoDTensor& value) {
116
118
values_.resize (index + 1 );
117
119
}
118
120
121
+ values_[index].set_lod (value.lod ());
119
122
values_[index].ShareDataWith (value);
120
123
}
121
124
@@ -144,6 +147,156 @@ DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level,
144
147
return unpacker.meta ;
145
148
}
146
149
150
+ LoDTensor TensorArray::LodPack (size_t level) const {
151
+ PADDLE_ENFORCE_GT (size (), 0UL , " no time step exists" );
152
+ // the levels should be no less than 2
153
+ LoDTensor merged;
154
+ const LoDTensor *pre, *cur;
155
+ pre = &Read (0 );
156
+
157
+ for (size_t step = 1 ; step < size (); step++) {
158
+ cur = &Read (step);
159
+ PADDLE_ENFORCE_GT (cur->NumLevels (), 0 );
160
+ PADDLE_ENFORCE_GT (pre->NumLevels (), 0 );
161
+ PADDLE_ENFORCE_EQ (pre->NumLevels (), cur->NumLevels ());
162
+ PADDLE_ENFORCE_EQ (pre->NumElements (level), cur->NumElements (level));
163
+
164
+ merged = LodPackTwo (*pre, *cur, level);
165
+ pre = &merged;
166
+ }
167
+ return merged;
168
+ }
169
+
170
+ /*
171
+ * NOTE currently, only the lowest level supports packing.
172
+ * The lowest LoD will be changed, while the relative offsets in levels above
173
+ * stay unchanged.
174
+ *
175
+ * previous step : [0] [1] [3]
176
+ * current step: [0 1 2] [2 3] []
177
+ * packed to
178
+ * [0 0] [0 1] [0 2] [1 2] [1 3] [3]
179
+ */
180
+ LoDTensor TensorArray::LodPackTwo (const LoDTensor& pre, const LoDTensor& cur,
181
+ size_t level) const {
182
+ PADDLE_ENFORCE_EQ (pre.NumLevels (), cur.NumLevels ());
183
+ PADDLE_ENFORCE_EQ (pre.NumLevels (), level + 1 ,
184
+ " Only the lowest LoD level supports pack temporarily." );
185
+ // calculate the result tensor's shape first
186
+ size_t num_instances = 0 ;
187
+ for (size_t elem = 0 ; elem < pre.NumElements (level); elem++) {
188
+ size_t prefix_size = pre.NumElements (level, elem);
189
+ size_t num_candidates = cur.NumElements (level, elem);
190
+ if (num_candidates > 0 ) {
191
+ num_instances += num_candidates * (prefix_size + 1 );
192
+ } else {
193
+ num_instances += prefix_size;
194
+ }
195
+ }
196
+
197
+ auto res_dims = pre.dims ();
198
+ res_dims[0 ] = num_instances;
199
+ LoDTensor result;
200
+ result.Resize (res_dims);
201
+ result.mutable_data <value_type>(cur.place ());
202
+
203
+ Vector<size_t > last_lod_level;
204
+ // copy data
205
+ size_t index = 0 ;
206
+ last_lod_level.push_back (index);
207
+ for (size_t elem = 0 ; elem < pre.NumElements (level); elem++) {
208
+ size_t prefix_size = pre.NumElements (level, elem);
209
+ size_t num_candidates = cur.NumElements (level, elem);
210
+
211
+ // slice the prefix Tensor
212
+ LoDTensor prefix = pre;
213
+ prefix.ShrinkInLevel (level, elem, elem + 1 );
214
+ LoDTensor candidate = cur;
215
+ if (num_candidates > 0 ) {
216
+ candidate.ShrinkInLevel (level, elem, elem + 1 );
217
+ } else { // just push prefix
218
+ result.Slice (index, index + prefix_size)
219
+ .CopyFrom (prefix, result.place (), platform::CPUDeviceContext ());
220
+ index += prefix_size;
221
+ last_lod_level.push_back (index);
222
+ }
223
+ for (size_t candi = 0 ; candi < num_candidates; candi++) {
224
+ // TODO(superjom) support GPU
225
+ result.Slice (index, index + prefix_size)
226
+ .CopyFrom (prefix, result.place (), platform::CPUDeviceContext ());
227
+ index += prefix_size;
228
+ // copy candidate record
229
+ result.Slice (index, index + 1 )
230
+ .CopyFrom (candidate.Slice (candi, candi + 1 ), result.place (),
231
+ platform::CPUDeviceContext ());
232
+ index++;
233
+ last_lod_level.push_back (index);
234
+ }
235
+ }
236
+
237
+ // update lod
238
+ auto lod = cur.lod ();
239
+ lod.back () = last_lod_level;
240
+ result.set_lod (lod);
241
+ return result;
242
+ }
243
+
244
+ /*
245
+ * source [0 1 2] [3 4] [5 6 7] will be transformd to a list of LoDTensors such
246
+ * as
247
+ * [0 3 5] [1 4 6] [2 7] with 1-level LoDs:
248
+ * - [0 1 2 3]
249
+ * - [0 1 2 3]
250
+ * - [0 1 1 2], the [1,1) here means the second sequence is empty
251
+ *
252
+ * NOTE Unpack a LoDTensor in this approach may result in a big LoD.
253
+ */
254
+ void TensorArray::LodUnpack (const LoDTensor& source, size_t level) {
255
+ PADDLE_ENFORCE_EQ (level, source.NumLevels () - 1 ,
256
+ " only the lowest LoD level supports unpack." );
257
+ int non_empty_instances = -1 ;
258
+ size_t index = 0 ;
259
+ Vector<size_t > lowest_lod_level;
260
+ lowest_lod_level.push_back (index);
261
+
262
+ for (size_t step = 0 ; non_empty_instances > 0 || non_empty_instances == -1 ;
263
+ step++) {
264
+ size_t num_instances = 0 ;
265
+ for (size_t id = 0 ; id < source.NumElements (level); id++) {
266
+ auto instance = source;
267
+ instance.ShrinkInLevel (level, id, id + 1 );
268
+ if (static_cast <size_t >(instance.dims ()[0 ]) > step) {
269
+ num_instances++;
270
+ index++;
271
+ }
272
+ lowest_lod_level.push_back (index);
273
+ }
274
+
275
+ // create tensor for this time step
276
+ LoDTensor tensor;
277
+ auto dims = source.dims ();
278
+ dims[0 ] = num_instances;
279
+ // set lod
280
+ auto lod = source.lod ();
281
+ lod.back () = lowest_lod_level;
282
+ tensor.set_lod (lod);
283
+
284
+ index = 0 ;
285
+ for (size_t id = 0 ; id < source.NumElements (level); id++) {
286
+ auto instance = source;
287
+ instance.ShrinkInLevel (level, id, id + 1 );
288
+ if (static_cast <size_t >(instance.dims ()[0 ]) > step) {
289
+ // copy this instance
290
+ tensor.Slice (index, index + 1 )
291
+ .CopyFrom (instance.Slice (step, step + 1 ), tensor.place (),
292
+ platform::CPUDeviceContext ());
293
+ index++;
294
+ }
295
+ }
296
+ Write (step, tensor);
297
+ }
298
+ }
299
+
147
300
LoDTensor TensorArray::Stack () const {
148
301
LoDTensor result;
149
302
if (size () == 0 ) return result;
0 commit comments