Skip to content

Commit 1b10421

Browse files
committed
Prepare matmul global func for test
1 parent 74159b3 commit 1b10421

File tree

3 files changed

+43
-34
lines changed

3 files changed

+43
-34
lines changed

main.cu

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#include <cuda_runtime.h>
2+
#include <cstdio>
3+
4+
#include "matmulf8.cuh"
5+
#include "load_core.cuh"
6+
7+
8+
int main() {
9+
int n = 4096, m = 4096, p = 4096;
10+
int *A, *B, *C;
11+
cudaSetDevice(0);
12+
cudaFree(0);
13+
14+
cudaMallocHost(&A, n * m * sizeof(int) / 4);
15+
cudaMallocHost(&B, m * p * sizeof(int) / 4);
16+
cudaMallocHost(&C, n * p * sizeof(int) / 4);
17+
#ifdef DB
18+
int* acore = load_core("addcore.bin");
19+
#else
20+
int* acore = load_core("apdcore.bin");
21+
#endif
22+
int* mcore = load_core("mltcore.bin");
23+
for(int i = 0; i < n * m / 4; i++) {
24+
A[i] = rand();
25+
// A[i] &= 0x7f7f7f7f;
26+
A[i] = 0;
27+
}
28+
for(int i = 0; i < m * p / 4; i++) {
29+
B[i] = rand();
30+
// B[i] &= 0x7f7f7f7f;
31+
B[i] = 0;
32+
}
33+
float t = matmul(A, B, C, n, m, p, acore, mcore);
34+
printf("Time: %f ms\n", t);
35+
float flops = 2.0 * n * m * p / t / 1e6;
36+
printf("FLOPS: %f GFLOPS\n", flops);
37+
cudaFreeHost(A); cudaFreeHost(B); cudaFreeHost(C);
38+
cudaFreeHost(acore); cudaFreeHost(mcore);
39+
return 0;
40+
}

matmulf8.cu

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -51,37 +51,3 @@ float matmul(int* A, int* B, int* C, int n, int m, int p, int* acore, int* mcore
5151

5252
return t;
5353
}
54-
55-
int main() {
56-
int n = 4096, m = 4096, p = 4096;
57-
int *A, *B, *C;
58-
cudaSetDevice(0);
59-
cudaFree(0);
60-
61-
cudaMallocHost(&A, n * m * sizeof(int) / 4);
62-
cudaMallocHost(&B, m * p * sizeof(int) / 4);
63-
cudaMallocHost(&C, n * p * sizeof(int) / 4);
64-
#ifdef DB
65-
int* acore = load_core("addcore.bin");
66-
#else
67-
int* acore = load_core("apdcore.bin");
68-
#endif
69-
int* mcore = load_core("mltcore.bin");
70-
for(int i = 0; i < n * m / 4; i++) {
71-
A[i] = rand();
72-
// A[i] &= 0x7f7f7f7f;
73-
A[i] = 0;
74-
}
75-
for(int i = 0; i < m * p / 4; i++) {
76-
B[i] = rand();
77-
// B[i] &= 0x7f7f7f7f;
78-
B[i] = 0;
79-
}
80-
float t = matmul(A, B, C, n, m, p, acore, mcore);
81-
printf("Time: %f ms\n", t);
82-
float flops = 2.0 * n * m * p / t / 1e6;
83-
printf("FLOPS: %f GFLOPS\n", flops);
84-
cudaFreeHost(A); cudaFreeHost(B); cudaFreeHost(C);
85-
cudaFreeHost(acore); cudaFreeHost(mcore);
86-
return 0;
87-
}

matmulf8.cuh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#pragma once
2+
3+
float matmul(int* A, int* B, int* C, int n, int m, int p, int* acore, int* mcore);

0 commit comments

Comments
 (0)