Skip to content

Commit b5fda27

Browse files
authored
Port WarpCTC Operator (#5107)
* Add Seq2BatchFunctor, which will be used in WarpCTCOp. * Implement WrapCTCFunctor and WrapCTCKernel. * Add unittest of warpctc_op. * Modify the check_output inferface in python unittest framework to allow check a subset of outputs. * Use absolute offset lod in warpctc_op and related functors. * Refine the comments of warpctc_op. * The new python unittest supports checking a subset of the outputs, so revoke the previous change. * Rename the transform from LoDTensor to Tensor with shape [max_sequence_length, num_sequences, sequence_width] to PaddingSequenceFunctor. * Update to the newest codes. * Rename the PaddingSequenceFunctor to PaddingLoDTensorFunctor and remove the computation of dimensions out of the functos.
1 parent fe341ba commit b5fda27

18 files changed

+1222
-11
lines changed

cmake/external/warpctc.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ ExternalProject_Add(
6363
MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}")
6464
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR})
6565

66-
ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL)
66+
ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL)
6767
SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
6868
ADD_DEPENDENCIES(warpctc extern_warpctc)
6969

paddle/operators/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ op_library(lstm_op DEPS sequence2batch lstm_compute)
151151
op_library(conv_transpose_op DEPS vol2col)
152152
op_library(gru_op DEPS sequence2batch gru_compute)
153153
op_library(recurrent_op DEPS executor)
154+
op_library(warpctc_op DEPS dynload_warpctc sequence_padding math_function)
154155
op_library(cos_sim_op DEPS cos_sim_functor)
155156
op_library(parallel_do_op DEPS executor)
156157
# FIXME(typhoonzero): save/load depends lodtensor serialization functions

paddle/operators/conv_op.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
230230
namespace ops = paddle::operators;
231231
REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad,
232232
ops::ConvOpGrad);
233-
namespace ops = paddle::operators;
234233
REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad,
235234
ops::ConvOpGrad);
236235

paddle/operators/math/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ if(WITH_GPU)
1212
nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor)
1313
nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function)
1414
nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor)
15+
nv_library(sequence_padding SRCS sequence_padding.cc sequence_padding.cu DEPS lod_tensor device_context)
1516
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
1617
nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context)
1718
nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context)
@@ -27,6 +28,7 @@ else()
2728
cc_library(vol2col SRCS vol2col.cc DEPS device_context tensor)
2829
cc_library(context_project SRCS context_project.cc DEPS device_context math_function)
2930
cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor)
31+
cc_library(sequence_padding SRCS sequence_padding.cc DEPS lod_tensor device_context)
3032
cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
3133
cc_library(maxouting SRCS maxouting.cc DEPS device_context)
3234
cc_library(unpooling SRCS unpooling.cc DEPS device_context)
@@ -38,3 +40,4 @@ cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
3840
cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor)
3941
cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor)
4042
cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col tensor)
43+
cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)

paddle/operators/math/im2col_test.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ limitations under the License. */
1414

1515
#include "paddle/operators/math/im2col.h"
1616
#include <gtest/gtest.h>
17-
#include <iostream>
1817

1918
template <typename DeviceContext, typename Place>
2019
void testIm2col() {
@@ -102,6 +101,7 @@ void testIm2col() {
102101
Copy(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp);
103102
out_ocf_ptr = output_tmp.data<float>();
104103
}
104+
105105
for (int i = 0; i < 6; ++i) {
106106
EXPECT_EQ(out_ocf_ptr[i], out_ocf_data[i]);
107107
}
@@ -154,6 +154,9 @@ void testIm2col() {
154154
for (int i = 0; i < 6; ++i) {
155155
EXPECT_EQ(in_ptr[i], col2im_data[i]);
156156
}
157+
158+
delete place;
159+
delete context;
157160
}
158161

159162
TEST(math, im2col) {
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/operators/math/sequence_padding.h"
16+
17+
namespace paddle {
18+
namespace operators {
19+
namespace math {
20+
21+
template <typename T>
22+
class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
23+
public:
24+
void operator()(const platform::CPUDeviceContext& context,
25+
const framework::LoDTensor& seq, framework::Tensor& padding,
26+
bool norm_by_times) {
27+
auto lod = seq.lod();
28+
PADDLE_ENFORCE_GT(lod.size(), 0UL,
29+
"The LoD of LoDTensor seq should not be null.");
30+
31+
const size_t level = 0;
32+
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
33+
34+
auto seq_dims = seq.dims();
35+
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
36+
"The first dimension of LoDTensor seq should be "
37+
"equal to the sum of all sequences's length.");
38+
39+
auto padding_dims = padding.dims();
40+
PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL,
41+
"The input padding should be a 3-D Tensor of shape "
42+
"[max_sequence_length, num_sequences, sequence_width].");
43+
44+
const size_t max_sequence_length = MaximumSequenceLength(lod, level);
45+
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
46+
"The first dimension of Tensor padding should be the "
47+
"maximum length of all sequences in LoDTensor seq.");
48+
49+
const size_t num_sequences = abs_offset_lod[level].size() - 1;
50+
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
51+
"The second dimension of Tensor padding should be the "
52+
"number of sequences in LoDTensor seq.");
53+
54+
const size_t sequence_width = seq.numel() / seq_dims[0];
55+
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
56+
"The third dimension of Tensor padding should be the "
57+
"width of sequence in LoDTensor seq.");
58+
59+
const T* seq_data = seq.data<T>();
60+
T* padding_data = padding.data<T>();
61+
for (size_t i = 0; i < max_sequence_length; ++i) {
62+
for (size_t j = 0; j < num_sequences; ++j) {
63+
size_t start_pos = abs_offset_lod[level][j];
64+
size_t sequence_length = abs_offset_lod[level][j + 1] - start_pos;
65+
if (i < sequence_length) {
66+
// i > 0 => sequence_length > 0
67+
T scale =
68+
norm_by_times ? (1.0f / static_cast<T>(sequence_length)) : 1.0f;
69+
for (size_t k = 0; k < sequence_width; ++k) {
70+
padding_data[(i * num_sequences + j) * sequence_width + k] =
71+
seq_data[(start_pos + i) * sequence_width + k] * scale;
72+
}
73+
} else {
74+
memset(padding_data + (i * num_sequences + j) * sequence_width, 0,
75+
sequence_width * sizeof(T));
76+
}
77+
}
78+
}
79+
}
80+
};
81+
82+
template <typename T>
83+
class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
84+
public:
85+
void operator()(const platform::CPUDeviceContext& context,
86+
framework::LoDTensor& seq, const framework::Tensor& padding,
87+
bool norm_by_times) {
88+
auto lod = seq.lod();
89+
PADDLE_ENFORCE_GT(lod.size(), 0UL,
90+
"The LoD of LoDTensor seq should not be null.");
91+
92+
const size_t level = 0;
93+
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
94+
95+
auto seq_dims = seq.dims();
96+
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
97+
"The first dimension of LoDTensor seq should be "
98+
"equal to the sum of all sequences's length.");
99+
100+
auto padding_dims = padding.dims();
101+
PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL,
102+
"The input padding should be a 3-D Tensor of shape "
103+
"[max_sequnece_length, num_sequences, sequence_width].");
104+
105+
const size_t max_sequence_length = MaximumSequenceLength(lod, level);
106+
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
107+
"The first dimension of Tensor padding should be "
108+
"the maximum length of all sequences in LoDTensor seq.");
109+
110+
const size_t num_sequences = abs_offset_lod[level].size() - 1;
111+
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
112+
"The second dimension of Tensor padding should be "
113+
"the number of sequences in LoDTensor seq.");
114+
115+
const size_t sequence_width = seq.numel() / seq_dims[0];
116+
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
117+
"The third dimension of Tensor padding should be the "
118+
"width of sequence in LoDTensor seq.");
119+
120+
const T* padding_data = padding.data<T>();
121+
T* seq_data = seq.data<T>();
122+
for (size_t i = 0; i < num_sequences; ++i) {
123+
size_t start_pos = abs_offset_lod[level][i];
124+
size_t sequence_length = abs_offset_lod[level][i + 1] - start_pos;
125+
for (size_t j = 0; j < sequence_length; ++j) {
126+
// sequence_width > j > 0
127+
T scale =
128+
norm_by_times ? (1.0f / static_cast<T>(sequence_length)) : 1.0f;
129+
for (size_t k = 0; k < sequence_width; ++k) {
130+
seq_data[(start_pos + j) * sequence_width + k] =
131+
padding_data[(j * num_sequences + i) * sequence_width + k] *
132+
scale;
133+
}
134+
}
135+
}
136+
}
137+
};
138+
139+
template class PaddingLoDTensorFunctor<platform::CPUDeviceContext, float>;
140+
template class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, float>;
141+
142+
} // namespace math
143+
} // namespace operators
144+
} // namespace paddle

0 commit comments

Comments
 (0)