Skip to content

Commit 55ec0e2

Browse files
Merge pull request #7649 from wanghaoshuang/fix_edit
Change input data type to int64_t
2 parents 0071b5f + 6e04e58 commit 55ec0e2

File tree

4 files changed

+23
-23
lines changed

4 files changed

+23
-23
lines changed

paddle/operators/edit_distance_op.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
4949
EditDistanceOpMaker(OpProto *proto, OpAttrChecker *op_checker)
5050
: OpProtoAndCheckerMaker(proto, op_checker) {
5151
AddInput("Hyps",
52-
"(2-D LoDTensor<int>, 2nd dim. equal to 1) "
52+
"(2-D LoDTensor<int64_t>, 2nd dim. equal to 1) "
5353
"The indices for hypothesis strings.");
5454
AddInput("Refs",
55-
"(2-D LoDTensor<int>, 2nd dim. equal to 1) "
55+
"(2-D LoDTensor<int64_t>, 2nd dim. equal to 1) "
5656
"The indices for reference strings.");
5757
AddAttr<bool>("normalized",
5858
"(bool, default false) Indicated whether to normalize "
@@ -66,22 +66,22 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
6666
EditDistance operator computes the edit distances between a batch of hypothesis
6767
strings and their references.
6868
69-
Edit distance, also called Levenshtein distance, measures how dissimilar two strings
70-
are by counting the minimum number of operations to transform one string into anthor.
71-
Here the operations include insertion, deletion, and substitution. For example,
72-
given hypothesis string A = "kitten" and reference B = "sitting", the edit distance
73-
is 3 for A will be transformed into B at least after two substitutions and one
69+
Edit distance, also called Levenshtein distance, measures how dissimilar two strings
70+
are by counting the minimum number of operations to transform one string into anthor.
71+
Here the operations include insertion, deletion, and substitution. For example,
72+
given hypothesis string A = "kitten" and reference B = "sitting", the edit distance
73+
is 3 for A will be transformed into B at least after two substitutions and one
7474
insertion:
75-
75+
7676
"kitten" -> "sitten" -> "sittin" -> "sitting"
7777
78-
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total
79-
number denoted by `batch_size`, and the separation is specified by the LoD information.
80-
And the `batch_size` reference strings are arranged in order in the same way in the
78+
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total
79+
number denoted by `batch_size`, and the separation is specified by the LoD information.
80+
And the `batch_size` reference strings are arranged in order in the same way in the
8181
LoDTensor Input(Refs).
8282
83-
Output(Out) contains the `batch_size` results and each stands for the edit stance
84-
for a pair of strings respectively. If Attr(normalized) is true, the edit distance
83+
Output(Out) contains the `batch_size` results and each stands for the edit stance
84+
for a pair of strings respectively. If Attr(normalized) is true, the edit distance
8585
will be divided by the length of reference string.
8686
)DOC");
8787
}

paddle/operators/edit_distance_op.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ __global__ void FillFirstColumn(T* dist, const int M, const int N) {
3939
}
4040

4141
template <typename T>
42-
__global__ void Levenshtein(T* dist, const int* x1, const int* x2, const int M,
43-
const int N, const int start) {
42+
__global__ void Levenshtein(T* dist, const int64_t* x1, const int64_t* x2,
43+
const int M, const int N, const int start) {
4444
int idx = blockDim.x * blockIdx.x + threadIdx.x;
4545
int offset = N;
4646
int index = start + idx * offset;
@@ -113,8 +113,8 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
113113
dist_t.Resize({m + 1, n + 1});
114114
dist_t.mutable_data<T>(ctx.GetPlace());
115115
auto dist = dist_t.data<T>();
116-
auto x1 = x1_t->data<int>() + hyp_lod[num];
117-
auto x2 = x2_t->data<int>() + ref_lod[num];
116+
auto x1 = x1_t->data<int64_t>() + hyp_lod[num];
117+
auto x2 = x2_t->data<int64_t>() + ref_lod[num];
118118

119119
FillFirstColumn<T><<<1 + m / PADDLE_CUDA_NUM_THREADS,
120120
PADDLE_CUDA_NUM_THREADS, 0, stream>>>(dist, m, n);

paddle/operators/edit_distance_op.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ class EditDistanceKernel : public framework::OpKernel<T> {
6060
dist_t.Resize({m + 1, n + 1});
6161
dist_t.mutable_data<T>(ctx.GetPlace());
6262
auto dist = dist_t.data<T>();
63-
auto x1 = x1_t->data<int>() + hyp_lod[num];
64-
auto x2 = x2_t->data<int>() + ref_lod[num];
63+
auto x1 = x1_t->data<int64_t>() + hyp_lod[num];
64+
auto x2 = x2_t->data<int64_t>() + ref_lod[num];
6565
for (int64_t i = 0; i < m + 1; ++i) {
6666
dist[i * (n + 1)] = i;
6767
}

python/paddle/v2/fluid/tests/test_edit_distance_op.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ class TestEditDistanceOp(OpTest):
5151
def setUp(self):
5252
self.op_type = "edit_distance"
5353
normalized = False
54-
x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int32")
55-
x2 = np.array([[0, 12, 4, 7, 8]]).astype("int32")
54+
x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int64")
55+
x2 = np.array([[0, 12, 4, 7, 8]]).astype("int64")
5656
x1 = np.transpose(x1)
5757
x2 = np.transpose(x2)
5858
x1_lod = [0, 1, 5]
@@ -79,8 +79,8 @@ class TestEditDistanceOpNormalized(OpTest):
7979
def setUp(self):
8080
self.op_type = "edit_distance"
8181
normalized = True
82-
x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int32")
83-
x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int32")
82+
x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int64")
83+
x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int64")
8484
x1 = np.transpose(x1)
8585
x2 = np.transpose(x2)
8686
x1_lod = [0, 1, 3, 6]

0 commit comments

Comments
 (0)