Skip to content

Commit 9a0233d

Browse files
authored
Feature/tensor array lod pack (#5007)
1 parent 5d536bc commit 9a0233d

File tree

6 files changed

+323
-9
lines changed

6 files changed

+323
-9
lines changed

paddle/framework/lod_tensor.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ size_t LoDTensor::NumElements(size_t level, size_t idx) const {
106106
return lod_[level][idx + 1] - lod_[level][idx];
107107
}
108108

109+
size_t LoDTensor::NumInstancesInElement(size_t level, size_t idx) const {
110+
PADDLE_ENFORCE_LT(level, NumLevels());
111+
PADDLE_ENFORCE_LT(idx, NumElements(level));
112+
auto abs_lod = ToAbsOffset(lod());
113+
size_t begin = abs_lod[level][idx];
114+
size_t end = abs_lod[level][idx + 1];
115+
return end - begin;
116+
}
117+
109118
void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) {
110119
auto new_lod = framework::SliceLevels(lod_, level_begin, level_end);
111120
lod_ = new_lod;
@@ -117,8 +126,15 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
117126
PADDLE_ENFORCE_LT(elem_begin, NumElements(level));
118127
PADDLE_ENFORCE_LT(elem_end, NumElements(level) + 1);
119128

129+
auto abs_lod = framework::ToAbsOffset(lod());
120130
auto new_lod = framework::SliceInLevel(lod_, level, elem_begin, elem_end);
121131
lod_ = new_lod;
132+
133+
// slice the underlying tensor
134+
size_t begin = abs_lod[level][elem_begin];
135+
size_t end = abs_lod[level][elem_end];
136+
PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty.");
137+
ShareDataWith(Slice(begin, end));
122138
}
123139

124140
std::string LoDTensor::SerializeToString() const {

paddle/framework/lod_tensor.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ class LoDTensor : public Tensor {
122122
*/
123123
size_t NumElements(size_t level, size_t idx) const;
124124

125+
/*
126+
* Get the number of instances in the underlying tensor in the `idx`-th
127+
* element.
128+
*/
129+
size_t NumInstancesInElement(size_t level, size_t idx) const;
130+
125131
/*
126132
* Shrink levels[level_begin:level_end]
127133
*/
@@ -157,5 +163,42 @@ class LoDTensor : public Tensor {
157163
private:
158164
LoD lod_;
159165
};
166+
167+
/*
168+
* Expand the `source` to fit the LoD of `lod`. For example, a `source`
169+
* LoDTensor is
170+
* - LoD: [0, 2]
171+
* - tensor: [a0, a1]
172+
* a `lod` is
173+
* - LoD: [0 3 5]
174+
* returns a new LoDTensor
175+
* - [a0 a0 a0 a1 a1]
176+
*/
177+
template <typename T>
178+
LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
179+
const platform::Place& place) {
180+
LoD abs_lod = ToAbsOffset(lod);
181+
const auto& lod_level = lod[level];
182+
size_t num_instances = source.dims()[0];
183+
184+
// new tensor
185+
LoDTensor tensor;
186+
tensor.set_lod(lod);
187+
auto dims = source.dims();
188+
dims[0] = lod_level.back();
189+
tensor.Resize(dims);
190+
tensor.mutable_data<T>(place);
191+
192+
PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
193+
for (size_t ins = 0; ins < num_instances; ins++) {
194+
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
195+
tensor.Slice(elem, elem + 1)
196+
.CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(),
197+
platform::CPUDeviceContext());
198+
}
199+
}
200+
return tensor;
201+
}
202+
160203
} // namespace framework
161204
} // namespace paddle

paddle/framework/lod_tensor_test.cc

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,19 +92,56 @@ TEST_F(LoDTensorTester, ShrinkInLevel) {
9292
size_t level = 0;
9393
LoDTensor new_lod_tensor = lod_tensor_;
9494
new_lod_tensor.ShrinkInLevel(level, 0, 1);
95-
EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
96-
EXPECT_EQ(new_lod_tensor.NumElements(0), 1UL);
97-
EXPECT_EQ(new_lod_tensor.NumElements(1), 2UL);
98-
EXPECT_EQ(new_lod_tensor.NumElements(2), 5UL);
99-
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
95+
ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL);
96+
ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
97+
ASSERT_EQ(new_lod_tensor.NumElements(1), 2UL);
98+
ASSERT_EQ(new_lod_tensor.NumElements(2), 5UL);
99+
ASSERT_EQ(new_lod_tensor.dims()[0], 12);
100+
for (int i = 0; i < 12 * 128; i++) {
101+
ASSERT_EQ(new_lod_tensor.data<float>()[i], i);
102+
}
100103

101104
level = 1;
102105
new_lod_tensor = lod_tensor_;
103106
new_lod_tensor.ShrinkInLevel(level, 1, 2);
104107
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
105108
ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
106109
ASSERT_EQ(new_lod_tensor.NumElements(1), 3UL);
107-
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
110+
ASSERT_EQ(new_lod_tensor.dims()[0], 7);
111+
for (int i = 5 * 128; i < 12 * 128; i++) {
112+
ASSERT_EQ(new_lod_tensor.data<float>()[i - 5 * 128], i);
113+
}
114+
115+
LoDTensor t1;
116+
t1.set_lod(lod_tensor_.lod());
117+
t1.ShareDataWith(lod_tensor_);
118+
119+
LoDTensor t2;
120+
t2.set_lod(lod_tensor_.lod());
121+
t2.ShareDataWith(lod_tensor_);
122+
123+
t1.ShrinkInLevel(0, 1, 2);
124+
t2.ShrinkInLevel(0, 0, 1);
125+
EXPECT_NE(t1.data<float>(), t2.data<float>());
126+
EXPECT_NE(t1.data<float>(), lod_tensor_.data<float>());
127+
}
128+
129+
TEST(LodExpand, test) {
130+
LoD lod{{0, 2}};
131+
LoDTensor tensor;
132+
tensor.set_lod(lod);
133+
tensor.Resize({2, 1});
134+
tensor.mutable_data<float>(platform::CPUPlace());
135+
tensor.data<float>()[0] = 0;
136+
tensor.data<float>()[1] = 1;
137+
138+
LoD target;
139+
target.emplace_back(std::vector<size_t>{0, 3, 5});
140+
auto new_tensor = LodExpand<float>(tensor, target, 0UL, platform::CPUPlace());
141+
std::vector<int> result{{0, 0, 0, 1, 1}};
142+
for (size_t i = 0; i < 5; i++) {
143+
ASSERT_EQ(new_tensor.data<float>()[i], result[i]);
144+
}
108145
}
109146

110147
TEST_F(LoDTensorTester, SerializeDeserialize) {

paddle/framework/tensor_array.cc

Lines changed: 156 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <algorithm>
2121
#include <limits>
2222

23+
#include "paddle/framework/eigen.h"
24+
2325
namespace paddle {
2426
namespace framework {
2527

@@ -104,10 +106,10 @@ void TensorArray::Write(size_t index, const LoDTensor& value) {
104106
values_.resize(index + 1);
105107
}
106108

109+
values_[index].set_lod(value.lod());
107110
values_[index].Resize(value.dims());
108-
values_[index].mutable_data<value_type>(platform::CPUPlace());
109-
values_[index].CopyFrom(value, platform::CPUPlace(),
110-
platform::CPUDeviceContext());
111+
values_[index].mutable_data<value_type>(value.place());
112+
values_[index].CopyFrom(value, value.place(), platform::CPUDeviceContext());
111113
}
112114

113115
void TensorArray::WriteShared(size_t index, const LoDTensor& value) {
@@ -116,6 +118,7 @@ void TensorArray::WriteShared(size_t index, const LoDTensor& value) {
116118
values_.resize(index + 1);
117119
}
118120

121+
values_[index].set_lod(value.lod());
119122
values_[index].ShareDataWith(value);
120123
}
121124

@@ -144,6 +147,156 @@ DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level,
144147
return unpacker.meta;
145148
}
146149

150+
LoDTensor TensorArray::LodPack(size_t level) const {
151+
PADDLE_ENFORCE_GT(size(), 0UL, "no time step exists");
152+
// the levels should be no less than 2
153+
LoDTensor merged;
154+
const LoDTensor *pre, *cur;
155+
pre = &Read(0);
156+
157+
for (size_t step = 1; step < size(); step++) {
158+
cur = &Read(step);
159+
PADDLE_ENFORCE_GT(cur->NumLevels(), 0);
160+
PADDLE_ENFORCE_GT(pre->NumLevels(), 0);
161+
PADDLE_ENFORCE_EQ(pre->NumLevels(), cur->NumLevels());
162+
PADDLE_ENFORCE_EQ(pre->NumElements(level), cur->NumElements(level));
163+
164+
merged = LodPackTwo(*pre, *cur, level);
165+
pre = &merged;
166+
}
167+
return merged;
168+
}
169+
170+
/*
171+
* NOTE currently, only the lowest level supports packing.
172+
* The lowest LoD will be changed, while the relative offsets in levels above
173+
* stay unchanged.
174+
*
175+
* previous step : [0] [1] [3]
176+
* current step: [0 1 2] [2 3] []
177+
* packed to
178+
* [0 0] [0 1] [0 2] [1 2] [1 3] [3]
179+
*/
180+
LoDTensor TensorArray::LodPackTwo(const LoDTensor& pre, const LoDTensor& cur,
181+
size_t level) const {
182+
PADDLE_ENFORCE_EQ(pre.NumLevels(), cur.NumLevels());
183+
PADDLE_ENFORCE_EQ(pre.NumLevels(), level + 1,
184+
"Only the lowest LoD level supports pack temporarily.");
185+
// calculate the result tensor's shape first
186+
size_t num_instances = 0;
187+
for (size_t elem = 0; elem < pre.NumElements(level); elem++) {
188+
size_t prefix_size = pre.NumElements(level, elem);
189+
size_t num_candidates = cur.NumElements(level, elem);
190+
if (num_candidates > 0) {
191+
num_instances += num_candidates * (prefix_size + 1);
192+
} else {
193+
num_instances += prefix_size;
194+
}
195+
}
196+
197+
auto res_dims = pre.dims();
198+
res_dims[0] = num_instances;
199+
LoDTensor result;
200+
result.Resize(res_dims);
201+
result.mutable_data<value_type>(cur.place());
202+
203+
Vector<size_t> last_lod_level;
204+
// copy data
205+
size_t index = 0;
206+
last_lod_level.push_back(index);
207+
for (size_t elem = 0; elem < pre.NumElements(level); elem++) {
208+
size_t prefix_size = pre.NumElements(level, elem);
209+
size_t num_candidates = cur.NumElements(level, elem);
210+
211+
// slice the prefix Tensor
212+
LoDTensor prefix = pre;
213+
prefix.ShrinkInLevel(level, elem, elem + 1);
214+
LoDTensor candidate = cur;
215+
if (num_candidates > 0) {
216+
candidate.ShrinkInLevel(level, elem, elem + 1);
217+
} else { // just push prefix
218+
result.Slice(index, index + prefix_size)
219+
.CopyFrom(prefix, result.place(), platform::CPUDeviceContext());
220+
index += prefix_size;
221+
last_lod_level.push_back(index);
222+
}
223+
for (size_t candi = 0; candi < num_candidates; candi++) {
224+
// TODO(superjom) support GPU
225+
result.Slice(index, index + prefix_size)
226+
.CopyFrom(prefix, result.place(), platform::CPUDeviceContext());
227+
index += prefix_size;
228+
// copy candidate record
229+
result.Slice(index, index + 1)
230+
.CopyFrom(candidate.Slice(candi, candi + 1), result.place(),
231+
platform::CPUDeviceContext());
232+
index++;
233+
last_lod_level.push_back(index);
234+
}
235+
}
236+
237+
// update lod
238+
auto lod = cur.lod();
239+
lod.back() = last_lod_level;
240+
result.set_lod(lod);
241+
return result;
242+
}
243+
244+
/*
245+
* source [0 1 2] [3 4] [5 6 7] will be transformd to a list of LoDTensors such
246+
* as
247+
* [0 3 5] [1 4 6] [2 7] with 1-level LoDs:
248+
* - [0 1 2 3]
249+
* - [0 1 2 3]
250+
* - [0 1 1 2], the [1,1) here means the second sequence is empty
251+
*
252+
* NOTE Unpack a LoDTensor in this approach may result in a big LoD.
253+
*/
254+
void TensorArray::LodUnpack(const LoDTensor& source, size_t level) {
255+
PADDLE_ENFORCE_EQ(level, source.NumLevels() - 1,
256+
"only the lowest LoD level supports unpack.");
257+
int non_empty_instances = -1;
258+
size_t index = 0;
259+
Vector<size_t> lowest_lod_level;
260+
lowest_lod_level.push_back(index);
261+
262+
for (size_t step = 0; non_empty_instances > 0 || non_empty_instances == -1;
263+
step++) {
264+
size_t num_instances = 0;
265+
for (size_t id = 0; id < source.NumElements(level); id++) {
266+
auto instance = source;
267+
instance.ShrinkInLevel(level, id, id + 1);
268+
if (static_cast<size_t>(instance.dims()[0]) > step) {
269+
num_instances++;
270+
index++;
271+
}
272+
lowest_lod_level.push_back(index);
273+
}
274+
275+
// create tensor for this time step
276+
LoDTensor tensor;
277+
auto dims = source.dims();
278+
dims[0] = num_instances;
279+
// set lod
280+
auto lod = source.lod();
281+
lod.back() = lowest_lod_level;
282+
tensor.set_lod(lod);
283+
284+
index = 0;
285+
for (size_t id = 0; id < source.NumElements(level); id++) {
286+
auto instance = source;
287+
instance.ShrinkInLevel(level, id, id + 1);
288+
if (static_cast<size_t>(instance.dims()[0]) > step) {
289+
// copy this instance
290+
tensor.Slice(index, index + 1)
291+
.CopyFrom(instance.Slice(step, step + 1), tensor.place(),
292+
platform::CPUDeviceContext());
293+
index++;
294+
}
295+
}
296+
Write(step, tensor);
297+
}
298+
}
299+
147300
LoDTensor TensorArray::Stack() const {
148301
LoDTensor result;
149302
if (size() == 0) return result;

paddle/framework/tensor_array.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ class TensorArray {
8686
*/
8787
DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);
8888

89+
/*
90+
* Pack an array of LoDTensors to a LoDTensor.
91+
*/
92+
LoDTensor LodPack(size_t level) const;
93+
94+
/*
95+
* Unpack a LoDTensor to an array of LoDTensors.
96+
*/
97+
void LodUnpack(const LoDTensor &source, size_t level);
98+
8999
/*
90100
* Pack the values into a tensor with rank one higher than each tensor in
91101
* values.
@@ -111,6 +121,9 @@ class TensorArray {
111121
protected:
112122
void Unstack(const LoDTensor &source, bool data_shared) const;
113123

124+
LoDTensor LodPackTwo(const LoDTensor &pre, const LoDTensor &cur,
125+
size_t level) const;
126+
114127
private:
115128
mutable std::vector<LoDTensor> values_;
116129
}; // class TensorArray

0 commit comments

Comments
 (0)