Skip to content

Commit 8143a42

Browse files
committed
1. Add more comments
1 parent 1bc8de3 commit 8143a42

File tree

2 files changed

+41
-11
lines changed

2 files changed

+41
-11
lines changed

python/paddle/v2/fluid/evaluator.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,20 +208,46 @@ def eval(self, executor, eval_program=None):
208208

209209
class EditDistance(Evaluator):
210210
"""
211-
Average edit distance error for multiple mini-batches.
211+
Accumulate edit distance sum and sequence number from mini-batches and
212+
compute the average edit_distance of all batches.
213+
214+
Args:
215+
input: the sequences predicted by network
216+
label: the target sequences which must has same sequence count
217+
with input.
218+
ignored_tokens(list of int): Tokens that should be removed before
219+
calculating edit distance.
220+
221+
Example:
222+
223+
exe = fluid.executor(place)
224+
distance_evaluator = fluid.Evaluator.EditDistance(input, label)
225+
for epoch in PASS_NUM:
226+
distance_evaluator.reset(exe)
227+
for data in batches:
228+
loss, sum_distance = exe.run(fetch_list=[cost] + distance_evaluator.metrics)
229+
avg_distance = distance_evaluator.eval(exe)
230+
pass_distance = distance_evaluator.eval(exe)
231+
232+
In the above example:
233+
'sum_distance' is the sum of the batch's edit distance.
234+
'avg_distance' is the average of edit distance from the firt batch to the current batch.
235+
'pass_distance' is the average of edit distance from all the pass.
236+
212237
"""
213238

214-
def __init__(self, input, label, k=1, **kwargs):
239+
def __init__(self, input, label, ignored_tokens=None, **kwargs):
215240
super(EditDistance, self).__init__("edit_distance", **kwargs)
216241
main_program = self.helper.main_program
217242
if main_program.current_block().idx != 0:
218243
raise ValueError("You can only invoke Evaluator in root block")
219244

220245
self.total_error = self.create_state(
221-
dtype='float32', shape=[1], suffix='total')
246+
dtype='float32', shape=[1], suffix='total_error')
222247
self.seq_num = self.create_state(
223-
dtype='int64', shape=[1], suffix='total')
224-
error, seq_num = layers.edit_distance(input=input, label=label)
248+
dtype='int64', shape=[1], suffix='seq_num')
249+
error, seq_num = layers.edit_distance(
250+
input=input, label=label, ignored_tokens=ignored_tokens)
225251
#error = layers.cast(x=error, dtype='float32')
226252
sum_error = layers.reduce_sum(error)
227253
layers.sums(input=[self.total_error, sum_error], out=self.total_error)

python/paddle/v2/fluid/layers/nn.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,7 +1864,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
18641864
return out
18651865

18661866

1867-
def edit_distance(input, label, normalized=False, tokens=None, name=None):
1867+
def edit_distance(input,
1868+
label,
1869+
normalized=False,
1870+
ignored_tokens=None,
1871+
name=None):
18681872
"""
18691873
EditDistance operator computes the edit distances between a batch of hypothesis strings and their references.Edit distance, also called Levenshtein distance, measures how dissimilar two strings are by counting the minimum number of operations to transform one string into anthor. Here the operations include insertion, deletion, and substitution. For example, given hypothesis string A = "kitten" and reference B = "sitting", the edit distance is 3 for A will be transformed into B at least after two substitutions and one insertion:
18701874
@@ -1882,10 +1886,10 @@ def edit_distance(input, label, normalized=False, tokens=None, name=None):
18821886
18831887
normalized(bool): Indicated whether to normalize the edit distance by the length of reference string.
18841888
1885-
tokens(list): Tokens that should be removed before calculating edit distance.
1889+
ignored_tokens(list of int): Tokens that should be removed before calculating edit distance.
18861890
18871891
Returns:
1888-
Variable: sequence-to-sequence edit distance loss in shape [batch_size, 1].
1892+
Variable: sequence-to-sequence edit distance in shape [batch_size, 1].
18891893
18901894
Examples:
18911895
.. code-block:: python
@@ -1898,22 +1902,22 @@ def edit_distance(input, label, normalized=False, tokens=None, name=None):
18981902
helper = LayerHelper("edit_distance", **locals())
18991903

19001904
# remove some tokens from input and labels
1901-
if tokens is not None and len(tokens) > 0:
1905+
if ignored_tokens is not None and len(ignored_tokens) > 0:
19021906
erased_input = helper.create_tmp_variable(dtype="int64")
19031907
erased_label = helper.create_tmp_variable(dtype="int64")
19041908

19051909
helper.append_op(
19061910
type="sequence_erase",
19071911
inputs={"X": [input]},
19081912
outputs={"Out": [erased_input]},
1909-
attrs={"tokens": tokens})
1913+
attrs={"tokens": ignored_tokens})
19101914
input = erased_input
19111915

19121916
helper.append_op(
19131917
type="sequence_erase",
19141918
inputs={"X": [label]},
19151919
outputs={"Out": [erase_label]},
1916-
attrs={"tokens": tokens})
1920+
attrs={"tokens": ignored_tokens})
19171921
label = erased_label
19181922

19191923
# edit distance op

0 commit comments

Comments
 (0)