Skip to content

Commit 2ce5c9d

Browse files
committed
fix gpu implement
1 parent 07e87ff commit 2ce5c9d

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

paddle/operators/math/sequence_padding.cu

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
7171
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
7272

7373
auto seq_dims = seq.dims();
74-
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
74+
PADDLE_ENFORCE_EQ(seq_dims[0],
75+
static_cast<int64_t>(abs_offset_lod[level].back()),
7576
"The first dimension of LoDTensor seq should be "
7677
"equal to the sum of all sequences's length.");
7778

@@ -80,17 +81,17 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
8081
"The input padding should be a 3-D Tensor of shape "
8182
"[max_sequence_length, num_sequences, sequence_width].");
8283

83-
size_t max_sequence_length = MaximumSequenceLength(lod, level);
84+
int64_t max_sequence_length = MaximumSequenceLength(lod, level);
8485
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
8586
"The first dimension of Tensor padding should be the "
8687
"maximum length of all sequences in LoDTensor seq.");
8788

88-
const size_t num_sequences = abs_offset_lod[level].size() - 1;
89+
const int64_t num_sequences = abs_offset_lod[level].size() - 1;
8990
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
9091
"The second dimension of Tensor padding should be the "
9192
"number of sequences in LoDTensor seq.");
9293

93-
const size_t sequence_width = seq.numel() / seq_dims[0];
94+
const int64_t sequence_width = seq.numel() / seq_dims[0];
9495
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
9596
"The third dimension of Tensor padding should be the "
9697
"width of sequence in LoDTensor seq.");
@@ -101,7 +102,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
101102
return;
102103
}
103104

104-
const size_t kBlockSize = 512;
105+
const int64_t kBlockSize = 512;
105106

106107
/* At least use 32 threads to copy sequence_width elements,
107108
* and at least 8 elements for each thread.
@@ -143,7 +144,8 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
143144
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
144145

145146
auto seq_dims = seq.dims();
146-
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
147+
PADDLE_ENFORCE_EQ(seq_dims[0],
148+
static_cast<int64_t>(abs_offset_lod[level].back()),
147149
"The first dimension of LoDTensor seq should be "
148150
"equal to the sum of all sequences's length.");
149151

@@ -152,17 +154,17 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
152154
"The input padding should be a 3-D Tensor of shape "
153155
"[max_sequnece_length, num_sequences, sequence_width].");
154156

155-
size_t max_sequence_length = MaximumSequenceLength(lod, level);
157+
int64_t max_sequence_length = MaximumSequenceLength(lod, level);
156158
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
157159
"The first dimension of Tensor padding should be "
158160
"the maximum length of all sequences in LoDTensor seq.");
159161

160-
const size_t num_sequences = abs_offset_lod[level].size() - 1;
162+
const int64_t num_sequences = abs_offset_lod[level].size() - 1;
161163
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
162164
"The second dimension of Tensor padding should be "
163165
"the number of sequences in LoDTensor seq.");
164166

165-
const size_t sequence_width = seq.numel() / seq_dims[0];
167+
const int64_t sequence_width = seq.numel() / seq_dims[0];
166168
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
167169
"The third dimension of Tensor padding should be the "
168170
"width of sequence in LoDTensor seq.");
@@ -173,7 +175,7 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
173175
return;
174176
}
175177

176-
const size_t kBlockSize = 512;
178+
const int64_t kBlockSize = 512;
177179

178180
/* At least use 32 threads to copy sequence_width elements,
179181
* and at least 8 elements for each thread.

0 commit comments

Comments
 (0)