-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHeatEquation.cu
More file actions
144 lines (110 loc) · 4.29 KB
/
HeatEquation.cu
File metadata and controls
144 lines (110 loc) · 4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#define N 10000 // Size of the grid
#define numSteps 1000 // Number of iteration
#define ALPHA 0.1 // Heat equation constant
#define SQUARE_SIZE 1000 // Dimension of the initial heated square
#define BLOCK_SIZE 32 // Thread block size for kernel
#define OutputNum 200 // Number of iterations to print grid
__global__ void heatEquation(float* u_old, float* u_new, const float dx2, const float dy2, const float dt) {
// Thread indexes in the grid
int i = blockIdx.x * blockDim.x + threadIdx.x + 1;
int j = blockIdx.y * blockDim.y + threadIdx.y + 1;
if (i < N - 1 && j < N - 1) {
int idx = j * N + i; // Linear index
// Heat equation
u_new[idx] = u_old[idx] + ALPHA * dt * ( (u_old[idx - 1] - 2.0 * u_old[idx] + u_old[idx + 1])/dx2 +
(u_old[idx - N] - 2.0 * u_old[idx] + u_old[idx + N])/dy2 );
}
}
void GridToFile(const char* filename, float* grid) {
FILE* file = fopen(filename, "w");
if (file == NULL) {
printf("Failed to open file '%s' for writing.\n", filename);
return;
}
for (int j = 0; j < N; j++) {
for (int i = 0; i < N; i++) {
fprintf(file, "%f ", grid[j * N + i]);
}
fprintf(file, "\n");
}
fclose(file);
}
int main() {
// Deltas
const float dx = 0.01; // Horizontal grid spacing
const float dy = 0.01; // Vertical grid spacing
const float dx2 = dx*dx;
const float dy2 = dy*dy;
const float dt = dx2 * dy2 / (2.0 * ALPHA * (dx2 + dy2)); // Largest stable time step
// Declaring the inital and final grids
float* u_old, *u_new;
float* d_u_old, *d_u_new;
u_old = (float*)malloc(N * N * sizeof(float));
u_new = (float*)malloc(N * N * sizeof(float));
// Initialize the grid
for (int j = 0; j < N; j++) {
for (int i = 0; i < N; i++) {
int idx = j * N + i;
if (i >= (N - SQUARE_SIZE) / 2 && i < (N + SQUARE_SIZE) / 2 &&
j >= (N - SQUARE_SIZE) / 2 && j < (N + SQUARE_SIZE) / 2) {
u_old[idx] = 1.0;
} else {
u_old[idx] = 0.0;
}
}
}
// // Writing the initial grid to a file
// GridToFile("heat_0000.txt", u_old);
// Allocating memory on the GPU
cudaMalloc((void**)&d_u_old, N * N * sizeof(float));
cudaMalloc((void**)&d_u_new, N * N * sizeof(float));
// Copying the initial grid to the GPU
cudaMemcpy(d_u_old, u_old, N * N * sizeof(float), cudaMemcpyHostToDevice);
// Threaed block size and thread block's grid size
dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE);
dim3 dimGrid((N-1) / (dimBlock.x + 1), (N-1) / (dimBlock.y + 1));
// Declaring the variables to measure execssution time
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Starting the timer
cudaEventRecord(start);
// Launching the kernel
for (int t = 0; t <= numSteps; t++) {
heatEquation<<<dimGrid, dimBlock>>>(d_u_old, d_u_new, dx2, dy2, dt);
// Swap the old and new grids
float* temp = d_u_new;
d_u_new = d_u_old;
d_u_old = temp;
// // Output visualization
// if (t % OutputNum == 0 && t != 0){
// // Copying the final grid back to the CPU
// cudaMemcpy(u_new, d_u_old, N * N * sizeof(float), cudaMemcpyDeviceToHost);
// // Writing the final grid to a file
// char filename[64];
// sprintf(filename, "heat_%04d.txt", t);
// GridToFile(filename, u_new);
// }
}
// Stopping the timer
cudaEventRecord(stop);
// Synchronizing threads
cudaEventSynchronize(stop);
// Saving the execution time
float exec_time = 0;
cudaEventElapsedTime(&exec_time, start, stop);
exec_time /= 1000; //Execution time in seconds
// Copying the final grid back to the CPU
cudaMemcpy(u_new, d_u_old, N * N * sizeof(float), cudaMemcpyDeviceToHost);
// Showing the parallel execution time
printf("Parallel execution time: %f s \n", exec_time);
// Free memory
free(u_old);
free(u_new);
cudaFree(d_u_old);
cudaFree(d_u_new);
return 0;
}