Skip to content

Commit 14d4ef6

Browse files
committed
Address Yury's comments
1 parent 929ca93 commit 14d4ef6

File tree

5 files changed

+46
-51
lines changed

5 files changed

+46
-51
lines changed

sycl/test-e2e/Matrix/SG32/joint_matrix_activation.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
//===----------------------------------------------------------------------===//
88
// REQUIRES: aspect-ext_intel_matrix
99

10-
// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
1110
// UNSUPPORTED: gpu-intel-dg2
11+
// UNSUPPORTED-INTENDED: SG size = 32 is not currently supported for SYCL Joint
12+
// Matrix by IGC on DG2
1213

1314
// RUN: %{build} %fp-model-precise -o %t.out
1415
// RUN: %{run} %t.out

sycl/test-e2e/Matrix/common.hpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,13 @@ void matrix_copy(unsigned int rows, unsigned int cols, T *src, T *dst) {
162162
}
163163
}
164164

165+
template <typename F, typename T>
166+
void matrix_apply(unsigned int rows, unsigned int cols, T *mat, F op) {
167+
for (unsigned int i = 0; i < rows; i++)
168+
for (unsigned int j = 0; j < cols; j++)
169+
mat[i * cols + j] = op(mat[i * cols + j]);
170+
}
171+
165172
template <Activation act = Activation::None, typename T1, typename T2,
166173
bool exact = false>
167174
bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) {
@@ -171,17 +178,7 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) {
171178
std::is_same_v<T1, bfloat16> ||
172179
(std::is_same_v<T1, double> &&
173180
std::is_same_v<T2, double>))) {
174-
float diff = 0;
175-
if constexpr (act == Activation::None)
176-
diff = std::fabs(src[i * cols + j] - (T1)ref[i * cols + j]);
177-
else if constexpr (act == Activation::ReLU)
178-
diff =
179-
std::fabs(src[i * cols + j] -
180-
(T1)(sycl::max(static_cast<T2>(0), ref[i * cols + j])));
181-
else if constexpr (act == Activation::Sigmoid)
182-
diff = std::fabs(src[i * cols + j] -
183-
(T1)(1.0f / (1.0f + sycl::exp(-ref[i * cols + j]))));
184-
181+
float diff = std::fabs(src[i * cols + j] - (T1)ref[i * cols + j]);
185182
if (diff > FLOAT_EPSILON || std::isnan(src[i * cols + j])) {
186183
std::cout << "Incorrect result in matrix. "
187184
<< "i: " << i << ", j: " << j
@@ -190,7 +187,8 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) {
190187
<< ", Epsilon: " << FLOAT_EPSILON << "\n";
191188
return false;
192189
}
193-
} else if constexpr (exact || std::is_same_v<T1, int32_t>) {
190+
} else if constexpr (exact || std::is_same_v<T1, int32_t> ||
191+
std::is_same_v<T1, int8_t>) {
194192
if (src[i * cols + j] != ref[i * cols + j]) {
195193
std::cout << "Incorrect result in matrix."
196194
<< "i: " << i << ", j: " << j

sycl/test-e2e/Matrix/joint_matrix_activation.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
//
77
//===----------------------------------------------------------------------===//
88
// REQUIRES: aspect-ext_intel_matrix
9-
// UNSUPPORTED: gpu-intel-dg2
109

1110
// RUN: %{build} -o %t.out
1211
// RUN: %{run} %t.out

sycl/test-e2e/Matrix/joint_matrix_activation_impl.hpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ constexpr size_t TM = 8;
1515
constexpr size_t TN = 16;
1616
constexpr size_t TK = 16;
1717

18+
template <typename T> T ReLU(T x) { return sycl::max(static_cast<T>(0), x); }
19+
20+
template <typename T> T Sigmoid(T x) {
21+
return x = 1.0f / (1.0f + sycl::exp(-x));
22+
}
23+
1824
template <Activation act, size_t TM, size_t TK, typename Group, typename Tsrc,
1925
typename Tdest, use UseSrc, use UseDest, layout LayoutSrc,
2026
layout LayoutDest>
@@ -25,13 +31,11 @@ void applyActivation(
2531
joint_matrix_copy(sg, sub_c, sub_a);
2632
} else if constexpr (act == Activation::ReLU) {
2733

28-
joint_matrix_apply(
29-
sg, sub_c, [=](float &x) { x = sycl::max(static_cast<float>(0), x); });
34+
joint_matrix_apply(sg, sub_c, [=](float &x) { x = ReLU(x); });
3035
joint_matrix_copy(sg, sub_c, sub_a);
3136

3237
} else if constexpr (act == Activation::Sigmoid) {
33-
joint_matrix_apply(sg, sub_c,
34-
[=](float &x) { x = 1.0f / (1.0f + sycl::exp(-x)); });
38+
joint_matrix_apply(sg, sub_c, [=](float &x) { x = Sigmoid(x); });
3539
joint_matrix_copy(sg, sub_c, sub_a);
3640
}
3741
return;
@@ -107,13 +111,14 @@ int main() {
107111
std::cout << (res ? "Copy passed" : "Copy failed") << std::endl;
108112

109113
matrix_activation_copy<Activation::ReLU>(MC, MA);
110-
res &= matrix_compare<Activation::ReLU>(MATRIX_M, MATRIX_N, (bfloat16 *)A,
111-
(float *)C);
114+
matrix_apply(MATRIX_M, MATRIX_N, (float *)C, [](float x) { return ReLU(x); });
115+
res &= matrix_compare(MATRIX_M, MATRIX_N, (bfloat16 *)A, (float *)C);
112116
std::cout << (res ? "ReLU passed" : "ReLU failed") << std::endl;
113117

114118
matrix_activation_copy<Activation::Sigmoid>(MC, MA);
115-
res &= matrix_compare<Activation::Sigmoid>(MATRIX_M, MATRIX_N, (bfloat16 *)A,
116-
(float *)C);
119+
matrix_apply(MATRIX_M, MATRIX_N, (float *)C,
120+
[](float x) { return Sigmoid(x); });
121+
res &= matrix_compare(MATRIX_M, MATRIX_N, (bfloat16 *)A, (float *)C);
117122
std::cout << (res ? "Sigmoid passed" : "Sigmoid failed") << std::endl;
118123

119124
return !res;

sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices_impl.hpp

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,22 @@
77
//===----------------------------------------------------------------------===//
88
#include <sycl/usm.hpp>
99

10-
template <typename Tc, typename Ta, size_t M, size_t N>
11-
bool apply_verify(Tc *C, Tc *D, Ta *A, Ta *Ar, Tc *Cref, Ta *Aref) {
12-
for (size_t i = 0; i < M; i++)
13-
for (size_t j = 0; j < N; j++) {
14-
Tc diffd = D[i * N + j] - Cref[i * N + j] * 2;
15-
Tc diffc = C[i * N + j] - sycl::max(static_cast<Tc>(0), Cref[i * N + j]);
16-
Ta diffar = Ar[i * N + j] - (Aref[i * N + j] + 42);
17-
Ta diffa = A[i * N + j] - (Aref[i * N + j] + 5);
18-
if constexpr (std::is_same_v<Ta, bfloat16>) {
19-
if (std::fabs(diffd) > FLOAT_EPSILON ||
20-
std::fabs(diffc) > FLOAT_EPSILON ||
21-
std::fabs(diffar) > FLOAT_EPSILON ||
22-
std::fabs(diffa) > FLOAT_EPSILON || std::isnan(C[i * N + j]) ||
23-
std::isnan(A[i * N + j])) {
24-
return false;
25-
}
26-
} else {
27-
if (std::abs(diffd) > 0 || std::abs(diffc) > 0 ||
28-
std::abs(diffar) > 0 || std::abs(diffa) > 0) {
29-
return false;
30-
}
31-
}
32-
}
33-
return true;
10+
template <typename T> T mul2(T x) { return x * 2; }
11+
12+
template <typename T> T add5(T x) { return x + 5; }
13+
14+
template <typename Tc, size_t M, size_t N>
15+
bool apply_verify(Tc *C, Tc *D, Tc *ref) {
16+
Tc *refcopy = (Tc *)std::malloc(M * N * sizeof(Tc));
17+
memcpy(refcopy, ref, M * N * sizeof(Tc));
18+
matrix_apply(M, N, ref, [](Tc x) { return mul2(x); });
19+
bool res = matrix_compare(M, N, D, ref);
20+
21+
matrix_apply(M, N, refcopy, [](Tc x) { return add5(x); });
22+
res &= matrix_compare(M, N, C, refcopy);
23+
return res;
3424
}
25+
3526
template <typename Tc, typename Ta, size_t TM, size_t TN, size_t TK, size_t M,
3627
size_t N, size_t K, class kernel_name>
3728
bool apply_two_matrices(Tc *C, Tc *D, Ta *A, Ta *Ar, Tc *Cref, Ta *Aref,
@@ -77,8 +68,8 @@ bool apply_two_matrices(Tc *C, Tc *D, Ta *A, Ta *Ar, Tc *Cref, Ta *Aref,
7768
sg, sub_c, pC + (sg_startx * TM) * N + sg_starty / sg_size * TN,
7869
N, layout::row_major);
7970
joint_matrix_apply(sg, sub_c, sub_d, [](Tc &x, Tc &y) {
80-
y = x * 2;
81-
x = sycl::max(static_cast<Tc>(0), x);
71+
y = mul2(x);
72+
x = add5(x);
8273
});
8374
joint_matrix_store(
8475
sg, sub_d, pD + (sg_startx * TM) * N + sg_starty / sg_size * TN,
@@ -90,8 +81,8 @@ bool apply_two_matrices(Tc *C, Tc *D, Ta *A, Ta *Ar, Tc *Cref, Ta *Aref,
9081
sg, sub_a, pA + (sg_startx * TM) * K + sg_starty / sg_size * TK,
9182
K);
9283
joint_matrix_apply(sg, sub_a, sub_ar, [](Ta &x, Ta &y) {
93-
y = x + 42;
94-
x += 5;
84+
y = mul2(x);
85+
x = add5(x);
9586
});
9687
ext::intel::experimental::matrix::joint_matrix_store(
9788
sg, sub_ar,
@@ -101,7 +92,8 @@ bool apply_two_matrices(Tc *C, Tc *D, Ta *A, Ta *Ar, Tc *Cref, Ta *Aref,
10192
K);
10293
}); // parallel for
10394
}).wait();
104-
return apply_verify<Tc, Ta, M, N>(C, D, A, Ar, Cref, Aref);
95+
return apply_verify<Tc, M, N>(C, D, Cref) &&
96+
apply_verify<Ta, M, N>(A, Ar, Aref);
10597
}
10698

10799
template <typename Ta, typename Tc, size_t TM, size_t TN, size_t TK,

0 commit comments

Comments
 (0)