Skip to content

Commit 44561a2

Browse files
Merge pull request #7655 from wanghaoshuang/ctc_evaluator_py
Add python wrapper for CTC greedy decoder and edit distance evaluator
2 parents b156bbc + d9d9be1 commit 44561a2

File tree

8 files changed

+233
-6
lines changed

8 files changed

+233
-6
lines changed

doc/api/v2/fluid/layers.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,16 @@ swish
500500
.. autofunction:: paddle.v2.fluid.layers.swish
501501
:noindex:
502502

503+
edit_distance
504+
---------------
505+
.. autofunction:: paddle.v2.fluid.layers.edit_distance_error
506+
:noindex:
507+
508+
ctc_greedy_decoder
509+
---------------
510+
.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder
511+
:noindex:
512+
503513
l2_normalize
504514
------------
505515
.. autofunction:: paddle.v2.fluid.layers.l2_normalize

paddle/operators/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ op_library(parallel_do_op DEPS executor)
156156
# Regist multiple Kernel to pybind
157157
if (WITH_GPU)
158158
op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col)
159+
op_library(edit_distance_op SRCS edit_distance_op.cc edit_distance_op.cu DEPS math_function)
159160
op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
160161
op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
161162
conv_transpose_cudnn_op.cu.cc DEPS vol2col)

paddle/operators/edit_distance_op.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class EditDistanceOp : public framework::OperatorWithKernel {
2525
PADDLE_ENFORCE(ctx->HasInput("Hyps"), "Input(Hyps) shouldn't be null.");
2626
PADDLE_ENFORCE(ctx->HasInput("Refs"), "Input(Refs) shouldn't be null.");
2727
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) shouldn't be null.");
28+
PADDLE_ENFORCE(ctx->HasOutput("SequenceNum"),
29+
"Output(SequenceNum) shouldn't be null.");
2830
auto hyp_dims = ctx->GetInputDim("Hyps");
2931
auto ref_dims = ctx->GetInputDim("Refs");
3032
PADDLE_ENFORCE(hyp_dims.size() == 2 && hyp_dims[1] == 1,
@@ -34,6 +36,7 @@ class EditDistanceOp : public framework::OperatorWithKernel {
3436
"Input(Refs) must be a 2-D LoDTensor with the 2nd dimension "
3537
"equal to 1.");
3638
ctx->SetOutputDim("Out", ctx->GetInputDim("Refs"));
39+
ctx->SetOutputDim("SequenceNum", {1});
3740
}
3841

3942
protected:
@@ -54,6 +57,7 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
5457
AddInput("Refs",
5558
"(2-D LoDTensor<int64_t>, 2nd dim. equal to 1) "
5659
"The indices for reference strings.");
60+
AddOutput("SequenceNum", "The sequence count of current batch");
5761
AddAttr<bool>("normalized",
5862
"(bool, default false) Indicated whether to normalize "
5963
"the edit distance by the length of reference string.")

paddle/operators/edit_distance_op.cu

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ limitations under the License. */
1414

1515
#include <algorithm>
1616
#include "paddle/framework/op_registry.h"
17+
#include "paddle/operators/math/math_function.h"
1718
#include "paddle/platform/cuda_helper.h"
1819
#include "paddle/platform/gpu_info.h"
1920

@@ -72,6 +73,8 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
7273

7374
auto* x1_t = ctx.Input<framework::LoDTensor>("Hyps");
7475
auto* x2_t = ctx.Input<framework::LoDTensor>("Refs");
76+
auto* sequence_num = ctx.Output<framework::Tensor>("SequenceNum");
77+
sequence_num->mutable_data<int64_t>(ctx.GetPlace());
7578

7679
auto normalized = ctx.Attr<bool>("normalized");
7780
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
@@ -88,7 +91,11 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
8891
"Reference string %d is empty.", i);
8992
}
9093

91-
auto num_strs = hyp_lod.size() - 1;
94+
const size_t num_strs = hyp_lod.size() - 1;
95+
math::SetConstant<platform::CUDADeviceContext, int64_t> set_constant;
96+
set_constant(ctx.template device_context<platform::CUDADeviceContext>(),
97+
sequence_num, static_cast<int64_t>(num_strs));
98+
9299
out_t->Resize({static_cast<int64_t>(num_strs), 1});
93100
out_t->mutable_data<T>(ctx.GetPlace());
94101
auto out = out_t->data<T>();

paddle/operators/edit_distance_op.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ limitations under the License. */
1616
#include <algorithm>
1717
#include "paddle/framework/eigen.h"
1818
#include "paddle/framework/op_registry.h"
19-
2019
namespace paddle {
2120
namespace operators {
2221

@@ -28,6 +27,8 @@ class EditDistanceKernel : public framework::OpKernel<T> {
2827

2928
auto* x1_t = ctx.Input<framework::LoDTensor>("Hyps");
3029
auto* x2_t = ctx.Input<framework::LoDTensor>("Refs");
30+
auto* sequence_num = ctx.Output<framework::Tensor>("SequenceNum");
31+
int64_t* seq_num_data = sequence_num->mutable_data<int64_t>(ctx.GetPlace());
3132

3233
auto normalized = ctx.Attr<bool>("normalized");
3334

@@ -41,6 +42,7 @@ class EditDistanceKernel : public framework::OpKernel<T> {
4142
"Reference string %d is empty.", i);
4243
}
4344
auto num_strs = hyp_lod.size() - 1;
45+
*seq_num_data = static_cast<int64_t>(num_strs);
4446

4547
out_t->Resize({static_cast<int64_t>(num_strs), 1});
4648
out_t->mutable_data<float>(ctx.GetPlace());

python/paddle/v2/fluid/evaluator.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,3 +205,63 @@ def eval(self, executor, eval_program=None):
205205
[precision], dtype='float32'), np.array(
206206
[recall], dtype='float32'), np.array(
207207
[f1_score], dtype='float32')
208+
209+
210+
class EditDistance(Evaluator):
211+
"""
212+
Accumulate edit distance sum and sequence number from mini-batches and
213+
compute the average edit_distance of all batches.
214+
215+
Args:
216+
input: the sequences predicted by network.
217+
label: the target sequences which must has same sequence count
218+
with input.
219+
ignored_tokens(list of int): Tokens that should be removed before
220+
calculating edit distance.
221+
222+
Example:
223+
224+
exe = fluid.executor(place)
225+
distance_evaluator = fluid.Evaluator.EditDistance(input, label)
226+
for epoch in PASS_NUM:
227+
distance_evaluator.reset(exe)
228+
for data in batches:
229+
loss, sum_distance = exe.run(fetch_list=[cost] + distance_evaluator.metrics)
230+
avg_distance = distance_evaluator.eval(exe)
231+
pass_distance = distance_evaluator.eval(exe)
232+
233+
In the above example:
234+
'sum_distance' is the sum of the batch's edit distance.
235+
'avg_distance' is the average of edit distance from the firt batch to the current batch.
236+
'pass_distance' is the average of edit distance from all the pass.
237+
238+
"""
239+
240+
def __init__(self, input, label, ignored_tokens=None, **kwargs):
241+
super(EditDistance, self).__init__("edit_distance", **kwargs)
242+
main_program = self.helper.main_program
243+
if main_program.current_block().idx != 0:
244+
raise ValueError("You can only invoke Evaluator in root block")
245+
246+
self.total_error = self.create_state(
247+
dtype='float32', shape=[1], suffix='total_error')
248+
self.seq_num = self.create_state(
249+
dtype='int64', shape=[1], suffix='seq_num')
250+
error, seq_num = layers.edit_distance(
251+
input=input, label=label, ignored_tokens=ignored_tokens)
252+
#error = layers.cast(x=error, dtype='float32')
253+
sum_error = layers.reduce_sum(error)
254+
layers.sums(input=[self.total_error, sum_error], out=self.total_error)
255+
layers.sums(input=[self.seq_num, seq_num], out=self.seq_num)
256+
self.metrics.append(sum_error)
257+
258+
def eval(self, executor, eval_program=None):
259+
if eval_program is None:
260+
eval_program = Program()
261+
block = eval_program.current_block()
262+
with program_guard(main_program=eval_program):
263+
total_error = _clone_var_(block, self.total_error)
264+
seq_num = _clone_var_(block, self.seq_num)
265+
seq_num = layers.cast(x=seq_num, dtype='float32')
266+
out = layers.elementwise_div(x=total_error, y=seq_num)
267+
return np.array(executor.run(eval_program, fetch_list=[out])[0])

python/paddle/v2/fluid/layers/nn.py

Lines changed: 143 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
2929
'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
3030
'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
31-
'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape'
31+
'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'warpctc',
32+
'sequence_reshape'
3233
]
3334

3435

@@ -1866,6 +1867,146 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
18661867
return out
18671868

18681869

1870+
def edit_distance(input,
1871+
label,
1872+
normalized=False,
1873+
ignored_tokens=None,
1874+
name=None):
1875+
"""
1876+
EditDistance operator computes the edit distances between a batch of hypothesis strings and their references. Edit distance, also called Levenshtein distance, measures how dissimilar two strings are by counting the minimum number of operations to transform one string into anthor. Here the operations include insertion, deletion, and substitution. For example, given hypothesis string A = "kitten" and reference B = "sitting", the edit distance is 3 for A will be transformed into B at least after two substitutions and one insertion:
1877+
1878+
"kitten" -> "sitten" -> "sittin" -> "sitting"
1879+
1880+
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total number denoted by `batch_size`, and the separation is specified by the LoD information. And the `batch_size` reference strings are arranged in order in the same way in the LoDTensor Input(Refs).
1881+
1882+
Output(Out) contains the `batch_size` results and each stands for the edit stance for a pair of strings respectively. If Attr(normalized) is true, the edit distance will be divided by the length of reference string.
1883+
1884+
Args:
1885+
1886+
input(Variable): The indices for hypothesis strings.
1887+
1888+
label(Variable): The indices for reference strings.
1889+
1890+
normalized(bool): Indicated whether to normalize the edit distance by the length of reference string.
1891+
1892+
ignored_tokens(list of int): Tokens that should be removed before calculating edit distance.
1893+
1894+
Returns:
1895+
Variable: sequence-to-sequence edit distance in shape [batch_size, 1].
1896+
1897+
Examples:
1898+
.. code-block:: python
1899+
1900+
x = fluid.layers.data(name='x', shape=[8], dtype='float32')
1901+
y = fluid.layers.data(name='y', shape=[7], dtype='float32')
1902+
1903+
cost = fluid.layers.edit_distance(input=x,label=y)
1904+
"""
1905+
helper = LayerHelper("edit_distance", **locals())
1906+
1907+
# remove some tokens from input and labels
1908+
if ignored_tokens is not None and len(ignored_tokens) > 0:
1909+
erased_input = helper.create_tmp_variable(dtype="int64")
1910+
erased_label = helper.create_tmp_variable(dtype="int64")
1911+
1912+
helper.append_op(
1913+
type="sequence_erase",
1914+
inputs={"X": [input]},
1915+
outputs={"Out": [erased_input]},
1916+
attrs={"tokens": ignored_tokens})
1917+
input = erased_input
1918+
1919+
helper.append_op(
1920+
type="sequence_erase",
1921+
inputs={"X": [label]},
1922+
outputs={"Out": [erase_label]},
1923+
attrs={"tokens": ignored_tokens})
1924+
label = erased_label
1925+
1926+
# edit distance op
1927+
edit_distance_out = helper.create_tmp_variable(dtype="int64")
1928+
sequence_num = helper.create_tmp_variable(dtype="int64")
1929+
helper.append_op(
1930+
type="edit_distance",
1931+
inputs={"Hyps": [input],
1932+
"Refs": [label]},
1933+
outputs={"Out": [edit_distance_out],
1934+
"SequenceNum": [sequence_num]},
1935+
attrs={"normalized": normalized})
1936+
1937+
return edit_distance_out, sequence_num
1938+
1939+
1940+
def ctc_greedy_decoder(input, blank, name=None):
1941+
"""
1942+
This op is used to decode sequences by greedy policy by below steps:
1943+
1. Get the indexes of max value for each row in input. a.k.a. numpy.argmax(input, axis=0).
1944+
2. For each sequence in result of step1, merge repeated tokens between two blanks and delete all blanks.
1945+
1946+
A simple example as below:
1947+
1948+
.. code-block:: text
1949+
1950+
Given:
1951+
1952+
input.data = [[0.6, 0.1, 0.3, 0.1],
1953+
[0.3, 0.2, 0.4, 0.1],
1954+
[0.1, 0.5, 0.1, 0.3],
1955+
[0.5, 0.1, 0.3, 0.1],
1956+
1957+
[0.5, 0.1, 0.3, 0.1],
1958+
[0.2, 0.2, 0.2, 0.4],
1959+
[0.2, 0.2, 0.1, 0.5],
1960+
[0.5, 0.1, 0.3, 0.1]]
1961+
1962+
input.lod = [[0, 4, 8]]
1963+
1964+
Then:
1965+
1966+
output.data = [[2],
1967+
[1],
1968+
[3]]
1969+
1970+
output.lod = [[0, 2, 3]]
1971+
1972+
Args:
1973+
1974+
input(Variable): (LoDTensor<float>), the probabilities of variable-length sequences, which is a 2-D Tensor with LoD information. It's shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences' length and num_classes is the true number of classes. (not including the blank label).
1975+
1976+
blank(int): the blank label index of Connectionist Temporal Classification (CTC) loss, which is in thehalf-opened interval [0, num_classes + 1).
1977+
1978+
Returns:
1979+
Variable: CTC greedy decode result.
1980+
1981+
Examples:
1982+
.. code-block:: python
1983+
1984+
x = fluid.layers.data(name='x', shape=[8], dtype='float32')
1985+
1986+
cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
1987+
"""
1988+
helper = LayerHelper("ctc_greedy_decoder", **locals())
1989+
# top 1 op
1990+
topk_out = helper.create_tmp_variable(dtype=input.dtype)
1991+
topk_indices = helper.create_tmp_variable(dtype="int64")
1992+
helper.append_op(
1993+
type="top_k",
1994+
inputs={"X": [input]},
1995+
outputs={"Out": [topk_out],
1996+
"Indices": [topk_indices]},
1997+
attrs={"k": 1})
1998+
1999+
# ctc align op
2000+
ctc_out = helper.create_tmp_variable(dtype="int64")
2001+
helper.append_op(
2002+
type="ctc_align",
2003+
inputs={"Input": [topk_indices]},
2004+
outputs={"Output": [ctc_out]},
2005+
attrs={"merge_repeated": True,
2006+
"blank": blank})
2007+
return ctc_out
2008+
2009+
18692010
def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
18702011
"""
18712012
An operator integrating the open source Warp-CTC library
@@ -1890,7 +2031,7 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
18902031
Temporal Classification (CTC) loss, which is in the
18912032
half-opened interval [0, num_classes + 1).
18922033
norm_by_times: (bool, default: false), whether to normalize
1893-
the gradients by the number of time-step,which is also the
2034+
the gradients by the number of time-step, which is also the
18942035
sequence's length. There is no need to normalize the gradients
18952036
if warpctc layer was follewed by a mean_op.
18962037

python/paddle/v2/fluid/tests/test_edit_distance_op.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def setUp(self):
6161

6262
num_strs = len(x1_lod) - 1
6363
distance = np.zeros((num_strs, 1)).astype("float32")
64+
sequence_num = np.array(2).astype("int64")
6465
for i in range(0, num_strs):
6566
distance[i] = Levenshtein(
6667
hyp=x1[x1_lod[i]:x1_lod[i + 1]],
@@ -70,7 +71,7 @@ def setUp(self):
7071
distance[i] = distance[i] / len_ref
7172
self.attrs = {'normalized': normalized}
7273
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
73-
self.outputs = {'Out': distance}
74+
self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
7475

7576
def test_check_output(self):
7677
self.check_output()
@@ -89,6 +90,7 @@ def setUp(self):
8990

9091
num_strs = len(x1_lod) - 1
9192
distance = np.zeros((num_strs, 1)).astype("float32")
93+
sequence_num = np.array(3).astype("int64")
9294
for i in range(0, num_strs):
9395
distance[i] = Levenshtein(
9496
hyp=x1[x1_lod[i]:x1_lod[i + 1]],
@@ -98,7 +100,7 @@ def setUp(self):
98100
distance[i] = distance[i] / len_ref
99101
self.attrs = {'normalized': normalized}
100102
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
101-
self.outputs = {'Out': distance}
103+
self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
102104

103105
def test_check_output(self):
104106
self.check_output()

0 commit comments

Comments
 (0)