Skip to content

Commit 64606ea

Browse files
committed
Merge branch 'develop' into fix_rendering_error_of_transpose_op
2 parents b3de0d9 + 44561a2 commit 64606ea

File tree

8 files changed

+234
-5
lines changed

8 files changed

+234
-5
lines changed

doc/api/v2/fluid/layers.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,16 @@ swish
500500
.. autofunction:: paddle.v2.fluid.layers.swish
501501
:noindex:
502502

503+
edit_distance
504+
---------------
505+
.. autofunction:: paddle.v2.fluid.layers.edit_distance_error
506+
:noindex:
507+
508+
ctc_greedy_decoder
509+
---------------
510+
.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder
511+
:noindex:
512+
503513
l2_normalize
504514
------------
505515
.. autofunction:: paddle.v2.fluid.layers.l2_normalize

paddle/operators/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ op_library(parallel_do_op DEPS executor)
156156
# Regist multiple Kernel to pybind
157157
if (WITH_GPU)
158158
op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col)
159+
op_library(edit_distance_op SRCS edit_distance_op.cc edit_distance_op.cu DEPS math_function)
159160
op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
160161
op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
161162
conv_transpose_cudnn_op.cu.cc DEPS vol2col)

paddle/operators/edit_distance_op.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class EditDistanceOp : public framework::OperatorWithKernel {
2525
PADDLE_ENFORCE(ctx->HasInput("Hyps"), "Input(Hyps) shouldn't be null.");
2626
PADDLE_ENFORCE(ctx->HasInput("Refs"), "Input(Refs) shouldn't be null.");
2727
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) shouldn't be null.");
28+
PADDLE_ENFORCE(ctx->HasOutput("SequenceNum"),
29+
"Output(SequenceNum) shouldn't be null.");
2830
auto hyp_dims = ctx->GetInputDim("Hyps");
2931
auto ref_dims = ctx->GetInputDim("Refs");
3032
PADDLE_ENFORCE(hyp_dims.size() == 2 && hyp_dims[1] == 1,
@@ -34,6 +36,7 @@ class EditDistanceOp : public framework::OperatorWithKernel {
3436
"Input(Refs) must be a 2-D LoDTensor with the 2nd dimension "
3537
"equal to 1.");
3638
ctx->SetOutputDim("Out", ctx->GetInputDim("Refs"));
39+
ctx->SetOutputDim("SequenceNum", {1});
3740
}
3841

3942
protected:
@@ -54,6 +57,7 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
5457
AddInput("Refs",
5558
"(2-D LoDTensor<int64_t>, 2nd dim. equal to 1) "
5659
"The indices for reference strings.");
60+
AddOutput("SequenceNum", "The sequence count of current batch");
5761
AddAttr<bool>("normalized",
5862
"(bool, default false) Indicated whether to normalize "
5963
"the edit distance by the length of reference string.")

paddle/operators/edit_distance_op.cu

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ limitations under the License. */
1414

1515
#include <algorithm>
1616
#include "paddle/framework/op_registry.h"
17+
#include "paddle/operators/math/math_function.h"
1718
#include "paddle/platform/cuda_helper.h"
1819
#include "paddle/platform/gpu_info.h"
1920

@@ -72,6 +73,8 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
7273

7374
auto* x1_t = ctx.Input<framework::LoDTensor>("Hyps");
7475
auto* x2_t = ctx.Input<framework::LoDTensor>("Refs");
76+
auto* sequence_num = ctx.Output<framework::Tensor>("SequenceNum");
77+
sequence_num->mutable_data<int64_t>(ctx.GetPlace());
7578

7679
auto normalized = ctx.Attr<bool>("normalized");
7780
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
@@ -88,7 +91,11 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
8891
"Reference string %d is empty.", i);
8992
}
9093

91-
auto num_strs = hyp_lod.size() - 1;
94+
const size_t num_strs = hyp_lod.size() - 1;
95+
math::SetConstant<platform::CUDADeviceContext, int64_t> set_constant;
96+
set_constant(ctx.template device_context<platform::CUDADeviceContext>(),
97+
sequence_num, static_cast<int64_t>(num_strs));
98+
9299
out_t->Resize({static_cast<int64_t>(num_strs), 1});
93100
out_t->mutable_data<T>(ctx.GetPlace());
94101
auto out = out_t->data<T>();

paddle/operators/edit_distance_op.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ limitations under the License. */
1616
#include <algorithm>
1717
#include "paddle/framework/eigen.h"
1818
#include "paddle/framework/op_registry.h"
19-
2019
namespace paddle {
2120
namespace operators {
2221

@@ -28,6 +27,8 @@ class EditDistanceKernel : public framework::OpKernel<T> {
2827

2928
auto* x1_t = ctx.Input<framework::LoDTensor>("Hyps");
3029
auto* x2_t = ctx.Input<framework::LoDTensor>("Refs");
30+
auto* sequence_num = ctx.Output<framework::Tensor>("SequenceNum");
31+
int64_t* seq_num_data = sequence_num->mutable_data<int64_t>(ctx.GetPlace());
3132

3233
auto normalized = ctx.Attr<bool>("normalized");
3334

@@ -41,6 +42,7 @@ class EditDistanceKernel : public framework::OpKernel<T> {
4142
"Reference string %d is empty.", i);
4243
}
4344
auto num_strs = hyp_lod.size() - 1;
45+
*seq_num_data = static_cast<int64_t>(num_strs);
4446

4547
out_t->Resize({static_cast<int64_t>(num_strs), 1});
4648
out_t->mutable_data<float>(ctx.GetPlace());

python/paddle/v2/fluid/evaluator.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,3 +205,63 @@ def eval(self, executor, eval_program=None):
205205
[precision], dtype='float32'), np.array(
206206
[recall], dtype='float32'), np.array(
207207
[f1_score], dtype='float32')
208+
209+
210+
class EditDistance(Evaluator):
211+
"""
212+
Accumulate edit distance sum and sequence number from mini-batches and
213+
compute the average edit_distance of all batches.
214+
215+
Args:
216+
input: the sequences predicted by network.
217+
label: the target sequences which must has same sequence count
218+
with input.
219+
ignored_tokens(list of int): Tokens that should be removed before
220+
calculating edit distance.
221+
222+
Example:
223+
224+
exe = fluid.executor(place)
225+
distance_evaluator = fluid.Evaluator.EditDistance(input, label)
226+
for epoch in PASS_NUM:
227+
distance_evaluator.reset(exe)
228+
for data in batches:
229+
loss, sum_distance = exe.run(fetch_list=[cost] + distance_evaluator.metrics)
230+
avg_distance = distance_evaluator.eval(exe)
231+
pass_distance = distance_evaluator.eval(exe)
232+
233+
In the above example:
234+
'sum_distance' is the sum of the batch's edit distance.
235+
'avg_distance' is the average of edit distance from the firt batch to the current batch.
236+
'pass_distance' is the average of edit distance from all the pass.
237+
238+
"""
239+
240+
def __init__(self, input, label, ignored_tokens=None, **kwargs):
241+
super(EditDistance, self).__init__("edit_distance", **kwargs)
242+
main_program = self.helper.main_program
243+
if main_program.current_block().idx != 0:
244+
raise ValueError("You can only invoke Evaluator in root block")
245+
246+
self.total_error = self.create_state(
247+
dtype='float32', shape=[1], suffix='total_error')
248+
self.seq_num = self.create_state(
249+
dtype='int64', shape=[1], suffix='seq_num')
250+
error, seq_num = layers.edit_distance(
251+
input=input, label=label, ignored_tokens=ignored_tokens)
252+
#error = layers.cast(x=error, dtype='float32')
253+
sum_error = layers.reduce_sum(error)
254+
layers.sums(input=[self.total_error, sum_error], out=self.total_error)
255+
layers.sums(input=[self.seq_num, seq_num], out=self.seq_num)
256+
self.metrics.append(sum_error)
257+
258+
def eval(self, executor, eval_program=None):
259+
if eval_program is None:
260+
eval_program = Program()
261+
block = eval_program.current_block()
262+
with program_guard(main_program=eval_program):
263+
total_error = _clone_var_(block, self.total_error)
264+
seq_num = _clone_var_(block, self.seq_num)
265+
seq_num = layers.cast(x=seq_num, dtype='float32')
266+
out = layers.elementwise_div(x=total_error, y=seq_num)
267+
return np.array(executor.run(eval_program, fetch_list=[out])[0])

python/paddle/v2/fluid/layers/nn.py

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,13 @@
5050
'sequence_last_step',
5151
'dropout',
5252
'split',
53+
'ctc_greedy_decoder',
54+
'edit_distance',
5355
'l2_normalize',
5456
'matmul',
5557
'warpctc',
5658
'sequence_reshape',
59+
'transpose',
5760
]
5861

5962

@@ -1891,6 +1894,146 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
18911894
return out
18921895

18931896

1897+
def edit_distance(input,
1898+
label,
1899+
normalized=False,
1900+
ignored_tokens=None,
1901+
name=None):
1902+
"""
1903+
EditDistance operator computes the edit distances between a batch of hypothesis strings and their references. Edit distance, also called Levenshtein distance, measures how dissimilar two strings are by counting the minimum number of operations to transform one string into anthor. Here the operations include insertion, deletion, and substitution. For example, given hypothesis string A = "kitten" and reference B = "sitting", the edit distance is 3 for A will be transformed into B at least after two substitutions and one insertion:
1904+
1905+
"kitten" -> "sitten" -> "sittin" -> "sitting"
1906+
1907+
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total number denoted by `batch_size`, and the separation is specified by the LoD information. And the `batch_size` reference strings are arranged in order in the same way in the LoDTensor Input(Refs).
1908+
1909+
Output(Out) contains the `batch_size` results and each stands for the edit stance for a pair of strings respectively. If Attr(normalized) is true, the edit distance will be divided by the length of reference string.
1910+
1911+
Args:
1912+
1913+
input(Variable): The indices for hypothesis strings.
1914+
1915+
label(Variable): The indices for reference strings.
1916+
1917+
normalized(bool): Indicated whether to normalize the edit distance by the length of reference string.
1918+
1919+
ignored_tokens(list of int): Tokens that should be removed before calculating edit distance.
1920+
1921+
Returns:
1922+
Variable: sequence-to-sequence edit distance in shape [batch_size, 1].
1923+
1924+
Examples:
1925+
.. code-block:: python
1926+
1927+
x = fluid.layers.data(name='x', shape=[8], dtype='float32')
1928+
y = fluid.layers.data(name='y', shape=[7], dtype='float32')
1929+
1930+
cost = fluid.layers.edit_distance(input=x,label=y)
1931+
"""
1932+
helper = LayerHelper("edit_distance", **locals())
1933+
1934+
# remove some tokens from input and labels
1935+
if ignored_tokens is not None and len(ignored_tokens) > 0:
1936+
erased_input = helper.create_tmp_variable(dtype="int64")
1937+
erased_label = helper.create_tmp_variable(dtype="int64")
1938+
1939+
helper.append_op(
1940+
type="sequence_erase",
1941+
inputs={"X": [input]},
1942+
outputs={"Out": [erased_input]},
1943+
attrs={"tokens": ignored_tokens})
1944+
input = erased_input
1945+
1946+
helper.append_op(
1947+
type="sequence_erase",
1948+
inputs={"X": [label]},
1949+
outputs={"Out": [erase_label]},
1950+
attrs={"tokens": ignored_tokens})
1951+
label = erased_label
1952+
1953+
# edit distance op
1954+
edit_distance_out = helper.create_tmp_variable(dtype="int64")
1955+
sequence_num = helper.create_tmp_variable(dtype="int64")
1956+
helper.append_op(
1957+
type="edit_distance",
1958+
inputs={"Hyps": [input],
1959+
"Refs": [label]},
1960+
outputs={"Out": [edit_distance_out],
1961+
"SequenceNum": [sequence_num]},
1962+
attrs={"normalized": normalized})
1963+
1964+
return edit_distance_out, sequence_num
1965+
1966+
1967+
def ctc_greedy_decoder(input, blank, name=None):
1968+
"""
1969+
This op is used to decode sequences by greedy policy by below steps:
1970+
1. Get the indexes of max value for each row in input. a.k.a. numpy.argmax(input, axis=0).
1971+
2. For each sequence in result of step1, merge repeated tokens between two blanks and delete all blanks.
1972+
1973+
A simple example as below:
1974+
1975+
.. code-block:: text
1976+
1977+
Given:
1978+
1979+
input.data = [[0.6, 0.1, 0.3, 0.1],
1980+
[0.3, 0.2, 0.4, 0.1],
1981+
[0.1, 0.5, 0.1, 0.3],
1982+
[0.5, 0.1, 0.3, 0.1],
1983+
1984+
[0.5, 0.1, 0.3, 0.1],
1985+
[0.2, 0.2, 0.2, 0.4],
1986+
[0.2, 0.2, 0.1, 0.5],
1987+
[0.5, 0.1, 0.3, 0.1]]
1988+
1989+
input.lod = [[0, 4, 8]]
1990+
1991+
Then:
1992+
1993+
output.data = [[2],
1994+
[1],
1995+
[3]]
1996+
1997+
output.lod = [[0, 2, 3]]
1998+
1999+
Args:
2000+
2001+
input(Variable): (LoDTensor<float>), the probabilities of variable-length sequences, which is a 2-D Tensor with LoD information. It's shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences' length and num_classes is the true number of classes. (not including the blank label).
2002+
2003+
blank(int): the blank label index of Connectionist Temporal Classification (CTC) loss, which is in thehalf-opened interval [0, num_classes + 1).
2004+
2005+
Returns:
2006+
Variable: CTC greedy decode result.
2007+
2008+
Examples:
2009+
.. code-block:: python
2010+
2011+
x = fluid.layers.data(name='x', shape=[8], dtype='float32')
2012+
2013+
cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
2014+
"""
2015+
helper = LayerHelper("ctc_greedy_decoder", **locals())
2016+
# top 1 op
2017+
topk_out = helper.create_tmp_variable(dtype=input.dtype)
2018+
topk_indices = helper.create_tmp_variable(dtype="int64")
2019+
helper.append_op(
2020+
type="top_k",
2021+
inputs={"X": [input]},
2022+
outputs={"Out": [topk_out],
2023+
"Indices": [topk_indices]},
2024+
attrs={"k": 1})
2025+
2026+
# ctc align op
2027+
ctc_out = helper.create_tmp_variable(dtype="int64")
2028+
helper.append_op(
2029+
type="ctc_align",
2030+
inputs={"Input": [topk_indices]},
2031+
outputs={"Output": [ctc_out]},
2032+
attrs={"merge_repeated": True,
2033+
"blank": blank})
2034+
return ctc_out
2035+
2036+
18942037
def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
18952038
"""
18962039
An operator integrating the open source Warp-CTC library
@@ -1915,7 +2058,7 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
19152058
Temporal Classification (CTC) loss, which is in the
19162059
half-opened interval [0, num_classes + 1).
19172060
norm_by_times: (bool, default: false), whether to normalize
1918-
the gradients by the number of time-step,which is also the
2061+
the gradients by the number of time-step, which is also the
19192062
sequence's length. There is no need to normalize the gradients
19202063
if warpctc layer was follewed by a mean_op.
19212064

python/paddle/v2/fluid/tests/test_edit_distance_op.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def setUp(self):
6161

6262
num_strs = len(x1_lod) - 1
6363
distance = np.zeros((num_strs, 1)).astype("float32")
64+
sequence_num = np.array(2).astype("int64")
6465
for i in range(0, num_strs):
6566
distance[i] = Levenshtein(
6667
hyp=x1[x1_lod[i]:x1_lod[i + 1]],
@@ -70,7 +71,7 @@ def setUp(self):
7071
distance[i] = distance[i] / len_ref
7172
self.attrs = {'normalized': normalized}
7273
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
73-
self.outputs = {'Out': distance}
74+
self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
7475

7576
def test_check_output(self):
7677
self.check_output()
@@ -89,6 +90,7 @@ def setUp(self):
8990

9091
num_strs = len(x1_lod) - 1
9192
distance = np.zeros((num_strs, 1)).astype("float32")
93+
sequence_num = np.array(3).astype("int64")
9294
for i in range(0, num_strs):
9395
distance[i] = Levenshtein(
9496
hyp=x1[x1_lod[i]:x1_lod[i + 1]],
@@ -98,7 +100,7 @@ def setUp(self):
98100
distance[i] = distance[i] / len_ref
99101
self.attrs = {'normalized': normalized}
100102
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
101-
self.outputs = {'Out': distance}
103+
self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
102104

103105
def test_check_output(self):
104106
self.check_output()

0 commit comments

Comments
 (0)