Skip to content

Commit 7f00716

Browse files
authored
Add context wait in type_transform (#8850)
1 parent 6f50dee commit 7f00716

File tree

3 files changed

+33
-25
lines changed

3 files changed

+33
-25
lines changed

paddle/fluid/framework/data_type_transform.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ struct CastDataType {
5353
auto* context = static_cast<const platform::CUDADeviceContext*>(ctx_);
5454
trans(*context, in_begin, in_end, out_begin,
5555
CastDataTypeFunctor<InType, OutType>());
56+
context->Wait();
5657
#endif
5758
} else {
5859
PADDLE_THROW("Unsupported place!");

paddle/fluid/framework/data_type_transform_test.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,13 @@ TEST(DataTypeTransform, CPUTransform) {
5050
TransDataType(kernel_fp32, kernel_fp64, in, &out);
5151
double* out_data_double = out.data<double>();
5252
for (int i = 0; i < data_number; ++i) {
53-
ASSERT_EQ(out_data_double[i], static_cast<double>(i / 3));
53+
EXPECT_EQ(out_data_double[i], static_cast<double>(i / 3));
5454
}
5555

5656
TransDataType(kernel_fp32, kernel_int32, in, &out);
5757
int* out_data_int = out.data<int>();
5858
for (int i = 0; i < data_number; ++i) {
59-
ASSERT_EQ(out_data_int[i], static_cast<int>(i / 3));
59+
EXPECT_EQ(out_data_int[i], static_cast<int>(i / 3));
6060
}
6161
}
6262

@@ -76,31 +76,31 @@ TEST(DataTypeTransform, CPUTransform) {
7676
TransDataType(kernel_fp16, kernel_fp32, in, &out);
7777
float* out_data_float = out.data<float>();
7878
for (int i = 0; i < data_number; ++i) {
79-
ASSERT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
79+
EXPECT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
8080
}
8181

8282
TransDataType(kernel_fp16, kernel_fp64, in, &out);
8383
double* out_data_double = out.data<double>();
8484
for (int i = 0; i < data_number; ++i) {
85-
ASSERT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
85+
EXPECT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
8686
}
8787

8888
TransDataType(kernel_fp16, kernel_int32, in, &out);
8989
int* out_data_int = out.data<int>();
9090
for (int i = 0; i < data_number; ++i) {
91-
ASSERT_EQ(out_data_int[i], static_cast<int>(ptr[i]));
91+
EXPECT_EQ(out_data_int[i], static_cast<int>(ptr[i]));
9292
}
9393

9494
TransDataType(kernel_fp16, kernel_int64, in, &out);
9595
int64_t* out_data_int64 = out.data<int64_t>();
9696
for (int i = 0; i < data_number; ++i) {
97-
ASSERT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
97+
EXPECT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
9898
}
9999

100100
TransDataType(kernel_fp16, kernel_bool, in, &out);
101101
bool* out_data_bool = out.data<bool>();
102102
for (int i = 0; i < data_number; ++i) {
103-
ASSERT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
103+
EXPECT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
104104
}
105105

106106
// transform float to float16
@@ -112,7 +112,7 @@ TEST(DataTypeTransform, CPUTransform) {
112112
TransDataType(kernel_fp32, kernel_fp16, in, &out);
113113
ptr = out.data<float16>();
114114
for (int i = 0; i < data_number; ++i) {
115-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x);
115+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x);
116116
}
117117

118118
// transform double to float16
@@ -124,7 +124,7 @@ TEST(DataTypeTransform, CPUTransform) {
124124
TransDataType(kernel_fp64, kernel_fp16, in, &out);
125125
ptr = out.data<float16>();
126126
for (int i = 0; i < data_number; ++i) {
127-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x);
127+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x);
128128
}
129129

130130
// transform int to float16
@@ -136,7 +136,7 @@ TEST(DataTypeTransform, CPUTransform) {
136136
TransDataType(kernel_int32, kernel_fp16, in, &out);
137137
ptr = out.data<float16>();
138138
for (int i = 0; i < data_number; ++i) {
139-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x);
139+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x);
140140
}
141141

142142
// transform int64 to float16
@@ -148,7 +148,7 @@ TEST(DataTypeTransform, CPUTransform) {
148148
TransDataType(kernel_int64, kernel_fp16, in, &out);
149149
ptr = out.data<float16>();
150150
for (int i = 0; i < data_number; ++i) {
151-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x);
151+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x);
152152
}
153153

154154
// transform bool to float16
@@ -160,7 +160,7 @@ TEST(DataTypeTransform, CPUTransform) {
160160
TransDataType(kernel_bool, kernel_fp16, in, &out);
161161
ptr = out.data<float16>();
162162
for (int i = 0; i < data_number; ++i) {
163-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x);
163+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x);
164164
}
165165
}
166166
}

paddle/fluid/framework/data_type_transform_test.cu

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,16 @@ TEST(DataTypeTransform, GPUTransform) {
4949
float arr[6] = {0, 1, 2, 3, 4, 5};
5050
int data_number = sizeof(arr) / sizeof(arr[0]);
5151
memcpy(in_ptr, arr, sizeof(arr));
52-
TensorCopy(in, gpu_place, context, &in_gpu);
5352

53+
TensorCopy(in, gpu_place, context, &in_gpu);
54+
context.Wait();
5455
TransDataType(kernel_fp32, kernel_fp64, in_gpu, &out_gpu);
5556
TensorCopy(out_gpu, cpu_place, context, &out);
5657
context.Wait();
5758

5859
double* out_data_double = out.data<double>();
5960
for (int i = 0; i < data_number; ++i) {
60-
ASSERT_EQ(out_data_double[i], static_cast<double>(arr[i]));
61+
EXPECT_EQ(out_data_double[i], static_cast<double>(arr[i]));
6162
}
6263

6364
TransDataType(kernel_fp32, kernel_int32, in_gpu, &out_gpu);
@@ -66,7 +67,7 @@ TEST(DataTypeTransform, GPUTransform) {
6667

6768
int* out_data_int = out.data<int>();
6869
for (int i = 0; i < data_number; ++i) {
69-
ASSERT_EQ(out_data_int[i], static_cast<int>(arr[i]));
70+
EXPECT_EQ(out_data_int[i], static_cast<int>(arr[i]));
7071
}
7172
}
7273

@@ -83,6 +84,7 @@ TEST(DataTypeTransform, GPUTransform) {
8384
int data_number = sizeof(arr) / sizeof(arr[0]);
8485
memcpy(ptr, arr, sizeof(arr));
8586
TensorCopy(in, gpu_place, context, &in_gpu);
87+
context.Wait();
8688

8789
// transform from float16 to other data types
8890
TransDataType(kernel_fp16, kernel_fp32, in_gpu, &out_gpu);
@@ -91,7 +93,7 @@ TEST(DataTypeTransform, GPUTransform) {
9193

9294
float* out_data_float = out.data<float>();
9395
for (int i = 0; i < data_number; ++i) {
94-
ASSERT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
96+
EXPECT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
9597
}
9698

9799
TransDataType(kernel_fp16, kernel_fp64, in_gpu, &out_gpu);
@@ -100,7 +102,7 @@ TEST(DataTypeTransform, GPUTransform) {
100102

101103
double* out_data_double = out.data<double>();
102104
for (int i = 0; i < data_number; ++i) {
103-
ASSERT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
105+
EXPECT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
104106
}
105107

106108
TransDataType(kernel_fp16, kernel_int32, in_gpu, &out_gpu);
@@ -109,7 +111,7 @@ TEST(DataTypeTransform, GPUTransform) {
109111

110112
int* out_data_int = out.data<int>();
111113
for (int i = 0; i < data_number; ++i) {
112-
ASSERT_EQ(out_data_int[i], static_cast<int>(ptr[i]));
114+
EXPECT_EQ(out_data_int[i], static_cast<int>(ptr[i]));
113115
}
114116

115117
TransDataType(kernel_fp16, kernel_int64, in_gpu, &out_gpu);
@@ -118,7 +120,7 @@ TEST(DataTypeTransform, GPUTransform) {
118120

119121
int64_t* out_data_int64 = out.data<int64_t>();
120122
for (int i = 0; i < data_number; ++i) {
121-
ASSERT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
123+
EXPECT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
122124
}
123125

124126
TransDataType(kernel_fp16, kernel_bool, in_gpu, &out_gpu);
@@ -127,7 +129,7 @@ TEST(DataTypeTransform, GPUTransform) {
127129

128130
bool* out_data_bool = out.data<bool>();
129131
for (int i = 0; i < data_number; ++i) {
130-
ASSERT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
132+
EXPECT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
131133
}
132134

133135
// transform float to float16
@@ -137,13 +139,14 @@ TEST(DataTypeTransform, GPUTransform) {
137139
}
138140

139141
TensorCopy(in, gpu_place, context, &in_gpu);
142+
context.Wait();
140143
TransDataType(kernel_fp32, kernel_fp16, in_gpu, &out_gpu);
141144
TensorCopy(out_gpu, cpu_place, context, &out);
142145
context.Wait();
143146

144147
ptr = out.data<float16>();
145148
for (int i = 0; i < data_number; ++i) {
146-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x);
149+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_float[i]).x);
147150
}
148151

149152
// transform double to float16
@@ -154,13 +157,14 @@ TEST(DataTypeTransform, GPUTransform) {
154157
}
155158

156159
TensorCopy(in, gpu_place, context, &in_gpu);
160+
context.Wait();
157161
TransDataType(kernel_fp64, kernel_fp16, in_gpu, &out_gpu);
158162
TensorCopy(out_gpu, cpu_place, context, &out);
159163
context.Wait();
160164

161165
ptr = out.data<float16>();
162166
for (int i = 0; i < data_number; ++i) {
163-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x);
167+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_double[i]).x);
164168
}
165169

166170
// transform int to float16
@@ -170,13 +174,14 @@ TEST(DataTypeTransform, GPUTransform) {
170174
}
171175

172176
TensorCopy(in, gpu_place, context, &in_gpu);
177+
context.Wait();
173178
TransDataType(kernel_int32, kernel_fp16, in_gpu, &out_gpu);
174179
TensorCopy(out_gpu, cpu_place, context, &out);
175180
context.Wait();
176181

177182
ptr = out.data<float16>();
178183
for (int i = 0; i < data_number; ++i) {
179-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x);
184+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int[i]).x);
180185
}
181186

182187
// transform int64 to float16
@@ -187,13 +192,14 @@ TEST(DataTypeTransform, GPUTransform) {
187192
}
188193

189194
TensorCopy(in, gpu_place, context, &in_gpu);
195+
context.Wait();
190196
TransDataType(kernel_int64, kernel_fp16, in_gpu, &out_gpu);
191197
TensorCopy(out_gpu, cpu_place, context, &out);
192198
context.Wait();
193199

194200
ptr = out.data<float16>();
195201
for (int i = 0; i < data_number; ++i) {
196-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x);
202+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_int64[i]).x);
197203
}
198204

199205
// transform bool to float16
@@ -203,13 +209,14 @@ TEST(DataTypeTransform, GPUTransform) {
203209
}
204210

205211
TensorCopy(in, gpu_place, context, &in_gpu);
212+
context.Wait();
206213
TransDataType(kernel_bool, kernel_fp16, in_gpu, &out_gpu);
207214
TensorCopy(out_gpu, cpu_place, context, &out);
208215
context.Wait();
209216

210217
ptr = out.data<float16>();
211218
for (int i = 0; i < data_number; ++i) {
212-
ASSERT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x);
219+
EXPECT_EQ(ptr[i].x, static_cast<float16>(in_data_bool[i]).x);
213220
}
214221
}
215222
}

0 commit comments

Comments
 (0)