forked from Berkeley-CS267/HW1
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdgemm-blocked.c
More file actions
49 lines (45 loc) · 1.64 KB
/
dgemm-blocked.c
File metadata and controls
49 lines (45 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
const char* dgemm_desc = "Simple blocked dgemm.";
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 41
#endif
#define min(a, b) (((a) < (b)) ? (a) : (b))
/*
* This auxiliary subroutine performs a smaller dgemm operation
* C := C + A * B
* where C is M-by-N, A is M-by-K, and B is K-by-N.
*/
static void do_block(int lda, int M, int N, int K, double* A, double* B, double* C) {
// For each row i of A
for (int i = 0; i < M; ++i) {
// For each column j of B
for (int j = 0; j < N; ++j) {
// Compute C(i,j)
double cij = C[i + j * lda];
for (int k = 0; k < K; ++k) {
cij += A[i + k * lda] * B[k + j * lda];
}
C[i + j * lda] = cij;
}
}
}
/* This routine performs a dgemm operation
* C := C + A * B
* where A, B, and C are lda-by-lda matrices stored in column-major format.
* On exit, A and B maintain their input values. */
void square_dgemm(int lda, double* A, double* B, double* C) {
// For each block-row of A
for (int i = 0; i < lda; i += BLOCK_SIZE) {
// For each block-column of B
for (int j = 0; j < lda; j += BLOCK_SIZE) {
// Accumulate block dgemms into block of C
for (int k = 0; k < lda; k += BLOCK_SIZE) {
// Correct block dimensions if block "goes off edge of" the matrix
int M = min(BLOCK_SIZE, lda - i);
int N = min(BLOCK_SIZE, lda - j);
int K = min(BLOCK_SIZE, lda - k);
// Perform individual block dgemm
do_block(lda, M, N, K, A + i + k * lda, B + k + j * lda, C + i + j * lda);
}
}
}
}