-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmatrix-multiplication.cpp
More file actions
129 lines (119 loc) · 4.49 KB
/
matrix-multiplication.cpp
File metadata and controls
129 lines (119 loc) · 4.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <ctime>
#include <vector>
#include <chrono>
#include <thread>
constexpr size_t NUM_ROWS_A = 1000;
constexpr size_t NUM_COLS_A = 1000;
constexpr size_t NUM_ROWS_B = NUM_COLS_A;
constexpr size_t NUM_COLS_B = 1000;
void sequentialMatrixMultiply(std::vector<std::vector<int>> &A, std::vector<std::vector<int>> &B, std::vector<std::vector<int>> &C)
{
for(size_t i = 0; i < NUM_ROWS_A; i++)
{
for(size_t j = 0; j < NUM_COLS_B; j++)
{
C[i][j] = 0;
for(size_t k = 0; k < NUM_COLS_A; k++)
{
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
void calculateResultByChunks(std::vector<std::vector<int>> &A, std::vector<std::vector<int>> &B, std::vector<std::vector<int>> &C,
size_t startRow, size_t endRow)
{
for(size_t i = startRow; i < endRow; i++)
{
for(size_t j = 0; j < NUM_COLS_B; j++)
{
int res = 0;
for(size_t k = 0; k < NUM_COLS_A; ++k)
{
res += A[i][k] * B[k][j];
}
C[i][j] = res;
}
}
}
void parallelMatrixMultiply(std::vector<std::vector<int>> &A, std::vector<std::vector<int>> &B, std::vector<std::vector<int>> &C)
{
unsigned int numThreads = std::thread::hardware_concurrency();
std::vector<std::thread> threads;
unsigned int rowsPerThread = NUM_ROWS_A / numThreads;
unsigned int numExtraRows = NUM_ROWS_A % numThreads;
size_t startRow = 0;
for(unsigned int i = 0; i < numThreads; i++)
{
size_t endRow = startRow + rowsPerThread + (i < numExtraRows ? 1 : 0);
threads.push_back(std::thread(calculateResultByChunks, std::ref(A), std::ref(B), std::ref(C), startRow, endRow));
startRow = endRow;
}
for(auto &thread : threads)
{
thread.join();
}
}
int main()
{
srand(time(NULL));
const unsigned int NUM_EVAL_RUNS = 3;
std::vector<std::vector<int>> A(NUM_ROWS_A, std::vector<int>(NUM_COLS_A));
std::vector<std::vector<int>> B(NUM_ROWS_B, std::vector<int>(NUM_COLS_B));
std::vector<std::vector<int>> sequentialResult(NUM_ROWS_A, std::vector<int>(NUM_COLS_B));
std::vector<std::vector<int>> parallelResult(NUM_ROWS_A, std::vector<int>(NUM_COLS_B));
for(size_t i = 0; i < NUM_ROWS_A; i++)
{
for(size_t j = 0; j < NUM_COLS_A; j++)
{
A[i][j] = rand() % 100 + 1;
}
}
for(size_t i = 0; i < NUM_ROWS_B; i++)
{
for(size_t j = 0; j < NUM_COLS_B; j++)
{
B[i][j] = rand() % 100 + 1;
}
}
std::cout << "Evaluating Sequential Implementation..." << std::endl;
std::chrono::duration<double, std::milli> sequentialDuration(0);
double averageSequentialDuration = 0.0;
sequentialMatrixMultiply(A, B, sequentialResult);
for(unsigned int i = 0; i < NUM_EVAL_RUNS; i++)
{
auto startTime = std::chrono::high_resolution_clock::now();
sequentialMatrixMultiply(A, B, sequentialResult);
sequentialDuration += std::chrono::high_resolution_clock::now() - startTime;
}
averageSequentialDuration = sequentialDuration.count() / NUM_EVAL_RUNS;
std::cout << "Evaluating Parallel Implementation..." << std::endl;
std::chrono::duration<double, std::milli> parallelDuration(0);
double averageParallelDuration = 0.0;
parallelMatrixMultiply(A, B, parallelResult);
for(unsigned i = 0; i < NUM_EVAL_RUNS; i++)
{
auto startTime = std::chrono::high_resolution_clock::now();
parallelMatrixMultiply(A, B, parallelResult);
parallelDuration += std::chrono::high_resolution_clock::now() - startTime;
}
averageParallelDuration = parallelDuration.count() / NUM_EVAL_RUNS;
for(size_t i = 0; i < NUM_ROWS_A; i++)
{
for(size_t j = 0; j < NUM_COLS_B; j++)
{
if(sequentialResult[i][j] != parallelResult[i][j])
{
std::cerr << "ERROR: Result mismatch betweem sequential and parallel executions!" << std::endl;
return -1;
}
}
}
std::cout << "Average Sequential Execution Duration: " << averageSequentialDuration << " ms" << std::endl;
std::cout << "Average Parallel Execution Duration: " << averageParallelDuration << " ms" << std::endl;
std::cout << "Parallel execution is " << std::fixed << std::setprecision(2) << averageSequentialDuration / averageParallelDuration << " times faster than sequential execution!" << std::endl;
return 0;
}