Skip to content

Commit 417fcf4

Browse files
authored
Modify Pybind LoDTensor API according to length-based LoD (#11106)
* add lod_tensor util and modify pybind * refind pybind LoDTensor API and modify LoDTensor and DataFeeder test * fix test error * fix detection map op test * fix reorder_lod_tensor test * fix seq_concat_op * fix chunk evel op test * fix target assign op * fix warp ctc op * address comments step 1: reverse reset_lod op * step 2: modify op test * add warning message * remove has_valid_lod * add back has_valid_lod * address comments * add exception catching trial
1 parent 53d1d0f commit 417fcf4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+765
-635
lines changed

benchmark/fluid/models/machine_translation.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -173,21 +173,6 @@ def simple_attention(encoder_vec, encoder_proj, decoder_state):
173173
return avg_cost, feeding_list
174174

175175

176-
def to_lodtensor(data, place):
177-
seq_lens = [len(seq) for seq in data]
178-
cur_len = 0
179-
lod = [cur_len]
180-
for l in seq_lens:
181-
cur_len += l
182-
lod.append(cur_len)
183-
flattened_data = np.concatenate(data, axis=0).astype("int64")
184-
flattened_data = flattened_data.reshape([len(flattened_data), 1])
185-
lod_t = core.LoDTensor()
186-
lod_t.set(flattened_data, place)
187-
lod_t.set_lod([lod])
188-
return lod_t, lod[-1]
189-
190-
191176
def lodtensor_to_ndarray(lod_tensor):
192177
dims = lod_tensor.get_dims()
193178
ndarray = np.zeros(shape=dims).astype('float32')

benchmark/fluid/models/stacked_dynamic_lstm.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -125,18 +125,3 @@ def gate_common(
125125
batch_size=args.batch_size)
126126

127127
return loss, inference_program, adam, train_reader, test_reader, batch_acc
128-
129-
130-
def to_lodtensor(data, place):
131-
seq_lens = [len(seq) for seq in data]
132-
cur_len = 0
133-
lod = [cur_len]
134-
for l in seq_lens:
135-
cur_len += l
136-
lod.append(cur_len)
137-
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
138-
flattened_data = flattened_data.reshape([len(flattened_data), 1])
139-
res = fluid.LoDTensor()
140-
res.set(flattened_data, place)
141-
res.set_lod([lod])
142-
return res

paddle/fluid/framework/lod_tensor.cc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor(
410410
}
411411
}
412412

413+
LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
414+
LoD length_lod;
415+
length_lod.reserve(offset_lod.size());
416+
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
417+
std::vector<size_t> level;
418+
if (offset_lod[lvl].size() > 0) {
419+
level.reserve(offset_lod[lvl].size() - 1);
420+
}
421+
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
422+
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
423+
}
424+
length_lod.push_back(level);
425+
}
426+
return length_lod;
427+
}
428+
429+
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
430+
LoD offset_lod;
431+
offset_lod.reserve(length_lod.size());
432+
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
433+
std::vector<size_t> level;
434+
level.reserve(length_lod[lvl].size() + 1);
435+
size_t tmp = 0;
436+
level.push_back(tmp);
437+
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
438+
tmp += length_lod[lvl][idx];
439+
level.push_back(tmp);
440+
}
441+
offset_lod.push_back(level);
442+
}
443+
return offset_lod;
444+
}
445+
413446
} // namespace framework
414447
} // namespace paddle

paddle/fluid/framework/lod_tensor.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer,
226226
extern std::vector<LoDTensor> ReadFromRecordIO(
227227
recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
228228

229+
/*
230+
* Convert between length-based LoD and offset-based LoD.
231+
* The implementation of LoDTensor class use offset-based LoD.
232+
* However, we want to expose the more user-friendly length-based
233+
* LoD to the Python side instead.
234+
*
235+
* Example:
236+
* If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
237+
* then length_lod = [[2, 1], [3, 2, 4]]
238+
*/
239+
LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
240+
241+
LoD ConvertToOffsetBasedLoD(const LoD& length_lod);
242+
229243
} // namespace framework
230244
} // namespace paddle

paddle/fluid/framework/lod_tensor_test.cc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) {
228228
ASSERT_FALSE(CheckAbsLoD(abs_lod0));
229229
}
230230

231+
TEST(LoD, ConvertToLengthBasedLoD) {
232+
LoD offset_lod;
233+
offset_lod.push_back(std::vector<size_t>({0, 2}));
234+
offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
235+
offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
236+
237+
LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
238+
239+
LoD expected;
240+
expected.push_back(std::vector<size_t>({2}));
241+
expected.push_back(std::vector<size_t>({1, 2}));
242+
expected.push_back(std::vector<size_t>({2, 2, 1}));
243+
244+
EXPECT_EQ(length_lod, expected);
245+
}
246+
247+
TEST(LoD, ConvertToOffsetBasedLoD) {
248+
LoD length_lod;
249+
length_lod.push_back(std::vector<size_t>({2}));
250+
length_lod.push_back(std::vector<size_t>({1, 2}));
251+
length_lod.push_back(std::vector<size_t>({2, 2, 1}));
252+
253+
LoD offset_lod = ConvertToOffsetBasedLoD(length_lod);
254+
255+
LoD expected;
256+
expected.push_back(std::vector<size_t>({0, 2}));
257+
expected.push_back(std::vector<size_t>({0, 1, 3}));
258+
expected.push_back(std::vector<size_t>({0, 2, 4, 5}));
259+
260+
EXPECT_EQ(offset_lod, expected);
261+
}
262+
231263
template <typename T>
232264
static void TestRecordIO() {
233265
LoDTensor tensor;

paddle/fluid/pybind/pybind.cc

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -144,28 +144,74 @@ PYBIND11_PLUGIN(core) {
144144
py::class_<LoDTensor, Tensor>(m, "LoDTensor")
145145
.def_buffer(
146146
[](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
147-
.def(
148-
"__init__",
149-
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
150-
LoD new_lod;
151-
new_lod.reserve(lod.size());
152-
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
153-
new (&instance) LoDTensor(new_lod);
154-
})
147+
.def("__init__",
148+
[](LoDTensor &instance, const std::vector<std::vector<size_t>>
149+
&recursive_sequence_lengths) {
150+
LoD new_lod;
151+
new_lod.reserve(recursive_sequence_lengths.size());
152+
std::copy(recursive_sequence_lengths.begin(),
153+
recursive_sequence_lengths.end(),
154+
std::back_inserter(new_lod));
155+
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
156+
PADDLE_ENFORCE(
157+
CheckLoD(new_offset_lod, -1),
158+
"the provided recursive_sequence_lengths info is invalid");
159+
new (&instance) LoDTensor(new_offset_lod);
160+
})
155161
.def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
156162
.def("set_lod",
157163
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
164+
// the input lod is offset-based level-of-detail info
165+
LOG(WARNING)
166+
<< "set_lod is deprecated and will be removed by 9.2018, "
167+
"please switch to set_recursive_sequence_lengths.";
158168
LoD new_lod;
159169
new_lod.reserve(lod.size());
160170
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
171+
PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()),
172+
"the provided lod info is invalid");
161173
self.set_lod(new_lod);
162174
})
163-
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
164-
auto lod = self.lod();
165-
std::vector<std::vector<size_t>> new_lod;
166-
new_lod.reserve(lod.size());
167-
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
168-
return new_lod;
175+
.def("set_recursive_sequence_lengths",
176+
[](LoDTensor &self, const std::vector<std::vector<size_t>>
177+
&recursive_sequence_lengths) {
178+
// the input recursive_sequence_lengths is length-based
179+
// level-of-detail info
180+
LoD new_lod;
181+
new_lod.reserve(recursive_sequence_lengths.size());
182+
std::copy(recursive_sequence_lengths.begin(),
183+
recursive_sequence_lengths.end(),
184+
std::back_inserter(new_lod));
185+
LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
186+
PADDLE_ENFORCE(
187+
CheckLoD(new_offset_lod, vectorize(self.dims()).front()),
188+
"the provided recursive_sequence_lengths info is invalid");
189+
self.set_lod(new_offset_lod);
190+
})
191+
.def("lod",
192+
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
193+
// output the offset-based lod info
194+
LOG(WARNING) << "lod is deprecated and will be removed by 9.2018, "
195+
"please switch to recursive_sequence_lengths.";
196+
LoD lod = self.lod();
197+
std::vector<std::vector<size_t>> new_lod;
198+
new_lod.reserve(lod.size());
199+
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
200+
return new_lod;
201+
})
202+
.def("recursive_sequence_lengths",
203+
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
204+
// output the length-based lod info
205+
LoD lod = ConvertToLengthBasedLoD(self.lod());
206+
std::vector<std::vector<size_t>> new_lod;
207+
new_lod.reserve(lod.size());
208+
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
209+
return new_lod;
210+
})
211+
.def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool {
212+
// Check that the lod info is valid and match the outermost
213+
// dimension of the LoDTensor data
214+
return CheckLoD(self.lod(), vectorize(self.dims()).front());
169215
});
170216

171217
py::class_<SelectedRows>(m, "SelectedRows")

python/paddle/fluid/data_feeder.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(self, place, lod_level, shape, dtype):
4747
self.lod = []
4848

4949
for i in six.range(lod_level):
50-
self.lod.append([0])
50+
self.lod.append([])
5151

5252
def feed(self, data):
5353
self._feed_impl_(data, self.lod, self.lod_level)
@@ -56,8 +56,7 @@ def _feed_impl_(self, data, lod, lod_level):
5656
if lod_level == 0:
5757
self.data.append(data)
5858
else:
59-
cur_lod_len = len(data)
60-
lod[0].append(lod[0][-1] + cur_lod_len)
59+
lod[0].append(len(data))
6160
for each_data in data:
6261
self._feed_impl_(each_data, lod[1:], lod_level - 1)
6362

@@ -66,7 +65,7 @@ def done(self):
6665
t = core.LoDTensor()
6766
t.set(arr, self.place)
6867
if self.lod_level > 0:
69-
t.set_lod(self.lod)
68+
t.set_recursive_sequence_lengths(self.lod)
7069
return t
7170

7271

python/paddle/fluid/lod_tensor.py

Lines changed: 4 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -18,80 +18,6 @@
1818
__all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
1919

2020

21-
def _validate_lod(lod, tensor_height=-1):
22-
"""Check whether the input length-based lod info is valid.
23-
24-
There are several things to check:
25-
1. lod should be a list of lists. Empty list is fine.
26-
2. The length of each sublist (a lod level) should be at least one.
27-
3. Each element in each lod level should be an integer greater than 0.
28-
4. The sum of one lod level should be equal to the length of the next lod level.
29-
5. The sum of the last lod level should be equal to the tensor height.
30-
Bypass this check if user does not provide tensor_height as input.
31-
32-
Args:
33-
lod: the length-based lod info, e.g., [[2, 3], [2, 1, 2, 3, 4]].
34-
tensor_height: the outermost dimension of the tensor with which the input
35-
lod is associated with.
36-
37-
Returns:
38-
A boolean indicating whether the input lod is valid or not.
39-
"""
40-
assert isinstance(lod, list), "lod should be a list"
41-
# Empty lod is fine
42-
if len(lod) == 0:
43-
return True
44-
45-
lod_sum = []
46-
for level in lod:
47-
assert isinstance(level, list), "each item in lod should be a list"
48-
# Each level of lod should have at least one length info
49-
if len(level) < 1:
50-
return False
51-
level_sum = 0
52-
for lod_len in level:
53-
# Each length in a level should be > 0
54-
if lod_len <= 0:
55-
return False
56-
level_sum += lod_len
57-
lod_sum.append(level_sum)
58-
59-
for idx, val in enumerate(lod_sum[:-1]):
60-
# Each level's sum should be equal to
61-
# the number of items in the next level
62-
if val != len(lod[idx + 1]):
63-
return False
64-
65-
if tensor_height == -1:
66-
return True
67-
else:
68-
# Last level's sum should be equal to the tensor height
69-
return lod_sum[-1] == tensor_height
70-
71-
72-
def _convert_lod(lod):
73-
"""Convert a length-based lod to a offset-based lod.
74-
75-
If the length-based lod is [[2, 3], [2, 1, 2, 3, 4]],
76-
then the offset-based lod is [[0, 2, 5], [0, 2, 3, 5, 8, 12]].
77-
78-
Args:
79-
lod: a length-based lod info.
80-
81-
Returns:
82-
A list of lists as the offset-based lod converted to from the input lod.
83-
"""
84-
new_lod = []
85-
for level in lod:
86-
cur_len = 0
87-
new_level = [cur_len]
88-
for lod_len in level:
89-
cur_len += lod_len
90-
new_level.append(cur_len)
91-
new_lod.append(new_level)
92-
return new_lod
93-
94-
9521
def create_lod_tensor(data, lod, place):
9622
"""Create a lod tensor from a numpy array, a list, or an existing lod tensor.
9723
@@ -139,11 +65,11 @@ def create_lod_tensor(data, lod, place):
13965
flattened_data = flattened_data.reshape([len(flattened_data), 1])
14066
return create_lod_tensor(flattened_data, lod, place)
14167
elif isinstance(data, np.ndarray):
142-
assert _validate_lod(lod,
143-
data.shape[0]), "the provided lod info is invalid"
14468
tensor = core.LoDTensor()
14569
tensor.set(data, place)
146-
tensor.set_lod(_convert_lod(lod))
70+
tensor.set_recursive_sequence_lengths(lod)
71+
assert tensor.has_valid_recursive_sequence_lengths(
72+
), "the provided lod info is invalid"
14773
return tensor
14874
else:
14975
raise TypeError(
@@ -181,9 +107,8 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high):
181107
A fluid LoDTensor object with tensor data and lod info.
182108
"""
183109
assert isinstance(base_shape, list), "base_shape should be a list"
184-
converted_lod = _convert_lod(lod)
185110
# append the total number of basic elements to the front of its shape
186-
overall_shape = [converted_lod[-1][-1]] + base_shape
111+
overall_shape = [sum(lod[-1])] + base_shape
187112
# the range of integer data elements is [low, high]
188113
data = np.random.random_integers(low, high, overall_shape).astype("int64")
189114
return create_lod_tensor(data, lod, place)

0 commit comments

Comments
 (0)