@@ -36,7 +36,6 @@ void test(queue &Q, int M, int N, int K)
36
36
auto A = malloc_device<T>(lda * K, Q);
37
37
auto B = malloc_device<T>(ldb * N, Q);
38
38
auto C = malloc_device<T>(ldc * N, Q);
39
- auto flag = malloc_shared<int >(1 , Q);
40
39
41
40
constexpr int rd_size = 1048576 ;
42
41
auto host_data = malloc_host<T>(rd_size, Q);
@@ -59,36 +58,23 @@ void test(queue &Q, int M, int N, int K)
59
58
replicate_data (Q, B, ldb * N, host_data, rd_size);
60
59
61
60
/* Verify that the leading entries of C are correct */
62
- std::cout << " -> Verification...\n " ;
61
+ std::cout << " -> Verification..." ;
63
62
(void ) time_gemms (1 );
64
63
size_t elems = std::min (ldc * N, rd_size);
65
- Q.copy (C, host_data, elems);
66
- flag[ 0 ] = 0 ;
64
+ Q.copy (C, host_data, elems). wait () ;
65
+ bool ok = true ;
67
66
int linear_id = 0 ;
68
67
for (size_t j = 0 ; j < N; j++) {
69
68
for (size_t i = 0 ; i < M; i++) {
70
69
linear_id = j*ldc + i;
71
70
if (linear_id >= elems) break ;
72
71
if (host_data[linear_id] != T (K)) {
73
- flag[ 0 ] = 1 ;
72
+ ok = false ;
74
73
}
75
74
}
76
75
if (linear_id >= elems) break ;
77
76
}
78
- /*
79
- for (size_t i = 0; i < elems; i++) {
80
- int count = 0;
81
- if (host_data[i] != T(K)) {
82
- flag[0] = 1;
83
- if (count < 10) {
84
- sycl::ext::oneapi::experimental::printf("error elem %d expect %f got %f\n",
85
- i, T(K), host_data[i]);
86
- count++;
87
- }
88
- }
89
- }
90
- */
91
- std::cout << " verification " << (flag[0 ] == 0 ? " passes." : " FAILS!" ) << std::endl;
77
+ std::cout << (ok ? " passes." : " FAILS!" ) << std::endl;
92
78
93
79
/* Fill A/B with random data */
94
80
generate_random_data (rd_size, host_data);
@@ -131,7 +117,6 @@ void test(queue &Q, int M, int N, int K)
131
117
free (A, Q);
132
118
free (B, Q);
133
119
free (C, Q);
134
- free (flag, Q);
135
120
free (host_data, Q);
136
121
}
137
122
0 commit comments