Merge pull request #2512 from andrewtbarker/matmult-verification

jimmytwei · web-flow · commit e195611e2136 · 2024-10-10T17:05:40.000-07:00
Add basic verification to oneMKL matmul sample
diff --git a/Libraries/oneMKL/matrix_mul_mkl/matrix_mul_mkl.cpp b/Libraries/oneMKL/matrix_mul_mkl/matrix_mul_mkl.cpp
@@ -37,13 +37,8 @@ void test(queue &Q, int M, int N, int K)
     auto B = malloc_device<T>(ldb * N, Q);
     auto C = malloc_device<T>(ldc * N, Q);
 
-    /* Fill A/B with random data */
     constexpr int rd_size = 1048576;
-    auto random_data = malloc_host<T>(rd_size, Q);
-    generate_random_data(rd_size, random_data);
-
-    replicate_data(Q, A, lda * K, random_data, rd_size);
-    replicate_data(Q, B, ldb * N, random_data, rd_size);
+    auto host_data = malloc_host<T>(rd_size, Q);
 
     /* Measure time for a given number of GEMM calls */
     auto time_gemms = [=, &Q](int runs) -> double {
@@ -57,7 +52,36 @@ void test(queue &Q, int M, int N, int K)
         return duration<double>(end - start).count();
     };
 
-    /* Do a warmup call to initialize MKL and ensure kernels are JIT'ed if needed */
+    /* Fill A/B with all ones to verify correctness */
+    generate_ones(rd_size, host_data);
+    replicate_data(Q, A, lda * K, host_data, rd_size);
+    replicate_data(Q, B, ldb * N, host_data, rd_size);
+
+    /* Verify that the leading entries of C are correct */
+    std::cout << " -> Verification...";
+    (void) time_gemms(1);
+    size_t elems = std::min(ldc * N, rd_size);
+    Q.copy(C, host_data, elems).wait();
+    bool ok = true;
+    int linear_id = 0;
+    for (size_t j = 0; j < N; j++) {
+        for (size_t i = 0; i < M; i++) {
+            linear_id = j*ldc + i;
+            if (linear_id >= elems) break;
+            if (host_data[linear_id] != T(K)) {
+                ok = false;
+            }
+        }
+        if (linear_id >= elems) break;
+    }
+    std::cout << (ok ? " passes." : " FAILS!") << std::endl;
+
+    /* Fill A/B with random data */
+    generate_random_data(rd_size, host_data);
+    replicate_data(Q, A, lda * K, host_data, rd_size);
+    replicate_data(Q, B, ldb * N, host_data, rd_size);
+
+    /* Do a warmup call with random data to initialize MKL and ensure kernels are JIT'ed if needed */
     std::cout << " -> Warmup...\n";
     (void) time_gemms(1);
 
@@ -93,7 +117,7 @@ void test(queue &Q, int M, int N, int K)
     free(A, Q);
     free(B, Q);
     free(C, Q);
-    free(random_data, Q);
+    free(host_data, Q);
 }
 
 void usage(const char *pname)
diff --git a/Libraries/oneMKL/matrix_mul_mkl/utilities.hpp b/Libraries/oneMKL/matrix_mul_mkl/utilities.hpp
@@ -24,6 +24,14 @@ int nice_ld(int x)
     return x;
 }
 
+template <typename T>
+void generate_ones(size_t elems, T *v)
+{
+#pragma omp parallel for
+    for (size_t i = 0; i < elems; i++)
+        v[i] = T(1);
+}
+
 /* Random number generation helpers */
 template <typename T>
 void generate_random_data(size_t elems, T *v)