diff --git a/HW3/HW3.pdf b/HW3/HW3.pdf new file mode 100644 index 00000000..2f8cef3f Binary files /dev/null and b/HW3/HW3.pdf differ diff --git a/HW3/P2/mandelbrot.cl b/HW3/P2/mandelbrot.cl index 5a11c020..65b6c564 100644 --- a/HW3/P2/mandelbrot.cl +++ b/HW3/P2/mandelbrot.cl @@ -10,10 +10,35 @@ mandelbrot(__global __read_only float *coords_real, float c_real, c_imag; float z_real, z_imag; + float newZr, newZi; + int iter; + // if inside of the boundaries then do the computation if ((x < w) && (y < h)) { - // YOUR CODE HERE - ; + + // initializing the variables + z_real = 0; + z_imag = 0; + c_real = coords_real[(y * w) + x]; + c_imag = coords_imag[(y * w) + x]; + + // generate the mandelbrot set + for (iter = 0; iter < max_iter; iter++) { + + //computing the magnitude, if greater than 4, quit the computation + if (z_real*z_real + z_imag*z_imag > 4.0) { + break; + } + + // get the new values for each array or real and imaginary separately + newZr = (z_real*z_real) - (z_imag*z_imag); + newZi = 2*(z_imag*z_real); + z_real = newZr + c_real; + z_imag = newZi + c_imag; + + } + // Transfer iteration data to output + out_counts[(y * w) + x] = iter; } } diff --git a/HW3/P3/P3.txt b/HW3/P3/P3.txt new file mode 100644 index 00000000..a3ba2ef4 --- /dev/null +++ b/HW3/P3/P3.txt @@ -0,0 +1,25 @@ +The platforms detected are: +--------------------------- +Apple Apple version: OpenCL 1.2 (Sep 21 2015 19:24:11) +The devices detected on platform Apple are: +--------------------------- +Intel(R) Core(TM) i7-5557U CPU @ 3.10GHz [Type: CPU ] +Maximum clock Frequency: 3100 MHz +Maximum allocable memory size: 4294 MB +Maximum work group size 1024 +--------------------------- +Intel(R) Iris(TM) Graphics 6100 [Type: GPU ] +Maximum clock Frequency: 1100 MHz +Maximum allocable memory size: 402 MB +Maximum work group size 256 +--------------------------- +This context is associated with 2 devices +The queue is using the device: Intel(R) Iris(TM) Graphics 6100 +The device memory bandwidth is 10.5842506351 GB/s +The host-device bandwidth is 8.8087100525 GB/s + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Best configuration and time for hardware: +configuration ('coalesced', 64, 64): 0.0030028 seconds + diff --git a/HW3/P3/sum.cl b/HW3/P3/sum.cl index ee914740..7dc97144 100644 --- a/HW3/P3/sum.cl +++ b/HW3/P3/sum.cl @@ -5,11 +5,15 @@ __kernel void sum_coalesced(__global float* x, { float sum = 0; size_t local_id = get_local_id(0); + size_t i = get_global_id(0); + int globalSize = get_global_size(0); + int localSize = get_local_size(0); + int ii; // thread i (i.e., with i = get_global_id()) should add x[i], // x[i + get_global_size()], ... up to N-1, and store in sum. - for (;;) { // YOUR CODE HERE - ; // YOUR CODE HERE + for (ii = 0; i + ii * globalSize < N; ii++) { + sum += x[i+ii*globalSize]; } fast[local_id] = sum; @@ -24,8 +28,11 @@ __kernel void sum_coalesced(__global float* x, // You can assume get_local_size(0) is a power of 2. // // See http://www.nehalemlabs.net/prototype/blog/2014/06/16/parallel-programming-with-opencl-and-python-parallel-reduce/ - for (;;) { // YOUR CODE HERE - ; // YOUR CODE HERE + for (ii = localSize/2; ii>0; ii >>=1) { + if(local_id0 ; ii>>=1) { + if(local_id 0) { + // set each pixel > 0 to its linear index + labels[y * w + x] = y * w + x; + } else { + // out of bounds, set to maximum + labels[y * w + x] = w * h; + } + } +} + +int +get_clamped_value(__global __read_only int *labels, + int w, int h, + int x, int y) +{ + if ((x < 0) || (x >= w) || (y < 0) || (y >= h)) + return w * h; + return labels[y * w + x]; +} + +__kernel void +propagate_labels(__global __read_write int *labels, + __global __write_only int *changed_flag, + __local int *buffer, + int w, int h, + int buf_w, int buf_h, + const int halo) +{ + // halo is the additional number of cells in one direction + + // Global position of output pixel + const int x = get_global_id(0); + const int y = get_global_id(1); + + // Local position relative to (0, 0) in workgroup + const int lx = get_local_id(0); + const int ly = get_local_id(1); + + // coordinates of the upper left corner of the buffer in image + // space, including halo + const int buf_corner_x = x - lx - halo; + const int buf_corner_y = y - ly - halo; + + // coordinates of our pixel in the local buffer + const int buf_x = lx + halo; + const int buf_y = ly + halo; + + // 1D index of thread within our work-group + const int idx_1D = ly * get_local_size(0) + lx; + + int old_label; + // Will store the output value + int new_label; + + int upNeighbor,rightNeighbor,downNeighbor,leftNeighbor,minNeighbors; + + // Load the relevant labels to a local buffer with a halo + if (idx_1D < buf_w) { + for (int row = 0; row < buf_h; row++) { + buffer[row * buf_w + idx_1D] = + get_clamped_value(labels, + w, h, + buf_corner_x + idx_1D, buf_corner_y + row); + } + } + + // Make sure all threads reach the next part after + // the local buffer is loaded + barrier(CLK_LOCAL_MEM_FENCE); + + // Fetch the value from the buffer the corresponds to + // the pixel for this thread + old_label = buffer[buf_y * buf_w + buf_x]; + + // CODE FOR PARTS 2 and 4 HERE (part 4 will replace part 2) + + // stay in bounds + if ((x < w) && (y < h)) { + // CODE FOR PART 1 HERE + // We set new_label to the value of old_label, but you will need + // to adjust this for correctness. + new_label = old_label; + + if (old_label 0) { + // set each pixel > 0 to its linear index + labels[y * w + x] = y * w + x; + } else { + // out of bounds, set to maximum + labels[y * w + x] = w * h; + } + } +} + +int +get_clamped_value(__global __read_only int *labels, + int w, int h, + int x, int y) +{ + if ((x < 0) || (x >= w) || (y < 0) || (y >= h)) + return w * h; + return labels[y * w + x]; +} + +__kernel void +propagate_labels(__global __read_write int *labels, + __global __write_only int *changed_flag, + __local int *buffer, + int w, int h, + int buf_w, int buf_h, + const int halo) +{ + // halo is the additional number of cells in one direction + + // Global position of output pixel + const int x = get_global_id(0); + const int y = get_global_id(1); + + // Local position relative to (0, 0) in workgroup + const int lx = get_local_id(0); + const int ly = get_local_id(1); + + // coordinates of the upper left corner of the buffer in image + // space, including halo + const int buf_corner_x = x - lx - halo; + const int buf_corner_y = y - ly - halo; + + // coordinates of our pixel in the local buffer + const int buf_x = lx + halo; + const int buf_y = ly + halo; + + // 1D index of thread within our work-group + const int idx_1D = ly * get_local_size(0) + lx; + + int old_label; + // Will store the output value + int new_label; + + int upNeighbor,rightNeighbor,downNeighbor,leftNeighbor,minNeighbors; + + // Load the relevant labels to a local buffer with a halo + if (idx_1D < buf_w) { + for (int row = 0; row < buf_h; row++) { + buffer[row * buf_w + idx_1D] = + get_clamped_value(labels, + w, h, + buf_corner_x + idx_1D, buf_corner_y + row); + } + } + + // Make sure all threads reach the next part after + // the local buffer is loaded + barrier(CLK_LOCAL_MEM_FENCE); + + // Fetch the value from the buffer the corresponds to + // the pixel for this thread + old_label = buffer[buf_y * buf_w + buf_x]; + + // CODE FOR PARTS 2 and 4 HERE (part 4 will replace part 2) + + // stay in bounds + if ((x < w) && (y < h)) { + // CODE FOR PART 1 HERE + // We set new_label to the value of old_label, but you will need + // to adjust this for correctness. + new_label = old_label; + + if (old_label < w * h) + { + buffer[buf_y * buf_w + buf_x] = labels[old_label]; + } + + if (old_label 0) { + // set each pixel > 0 to its linear index + labels[y * w + x] = y * w + x; + } else { + // out of bounds, set to maximum + labels[y * w + x] = w * h; + } + } +} + +int +get_clamped_value(__global __read_only int *labels, + int w, int h, + int x, int y) +{ + if ((x < 0) || (x >= w) || (y < 0) || (y >= h)) + return w * h; + return labels[y * w + x]; +} + +__kernel void +propagate_labels(__global __read_write int *labels, + __global __write_only int *changed_flag, + __local int *buffer, + int w, int h, + int buf_w, int buf_h, + const int halo) +{ + // halo is the additional number of cells in one direction + + // Global position of output pixel + const int x = get_global_id(0); + const int y = get_global_id(1); + + // Local position relative to (0, 0) in workgroup + const int lx = get_local_id(0); + const int ly = get_local_id(1); + + // coordinates of the upper left corner of the buffer in image + // space, including halo + const int buf_corner_x = x - lx - halo; + const int buf_corner_y = y - ly - halo; + + // coordinates of our pixel in the local buffer + const int buf_x = lx + halo; + const int buf_y = ly + halo; + + // 1D index of thread within our work-group + const int idx_1D = ly * get_local_size(0) + lx; + + int old_label; + // Will store the output value + int new_label; + + int upNeighbor,rightNeighbor,downNeighbor,leftNeighbor,minNeighbors; + + // Load the relevant labels to a local buffer with a halo + if (idx_1D < buf_w) { + for (int row = 0; row < buf_h; row++) { + buffer[row * buf_w + idx_1D] = + get_clamped_value(labels, + w, h, + buf_corner_x + idx_1D, buf_corner_y + row); + } + } + + // Make sure all threads reach the next part after + // the local buffer is loaded + barrier(CLK_LOCAL_MEM_FENCE); + + // Fetch the value from the buffer the corresponds to + // the pixel for this thread + old_label = buffer[buf_y * buf_w + buf_x]; + + // CODE FOR PARTS 2 and 4 HERE (part 4 will replace part 2) + + // stay in bounds + if ((x < w) && (y < h)) { + // CODE FOR PART 1 HERE + // We set new_label to the value of old_label, but you will need + // to adjust this for correctness. + new_label = old_label; + + if (old_label < w * h) + { + buffer[buf_y * buf_w + buf_x] = labels[old_label]; + } + + if (old_label 0) { + // set each pixel > 0 to its linear index + labels[y * w + x] = y * w + x; + } else { + // out of bounds, set to maximum + labels[y * w + x] = w * h; + } + } +} + +int +get_clamped_value(__global __read_only int *labels, + int w, int h, + int x, int y) +{ + if ((x < 0) || (x >= w) || (y < 0) || (y >= h)) + return w * h; + return labels[y * w + x]; +} + +__kernel void +propagate_labels(__global __read_write int *labels, + __global __write_only int *changed_flag, + __local int *buffer, + int w, int h, + int buf_w, int buf_h, + const int halo) +{ + // halo is the additional number of cells in one direction + + // Global position of output pixel + const int x = get_global_id(0); + const int y = get_global_id(1); + + // Local position relative to (0, 0) in workgroup + const int lx = get_local_id(0); + const int ly = get_local_id(1); + + // coordinates of the upper left corner of the buffer in image + // space, including halo + const int buf_corner_x = x - lx - halo; + const int buf_corner_y = y - ly - halo; + + // coordinates of our pixel in the local buffer + const int buf_x = lx + halo; + const int buf_y = ly + halo; + + // 1D index of thread within our work-group + const int idx_1D = ly * get_local_size(0) + lx; + + int old_label; + // Will store the output value + int new_label; + + int upNeighbor,rightNeighbor,downNeighbor,leftNeighbor,minNeighbors; + + int gpartent, xx, yy; + // Load the relevant labels to a local buffer with a halo + if (idx_1D < buf_w) { + for (int row = 0; row < buf_h; row++) { + buffer[row * buf_w + idx_1D] = + get_clamped_value(labels, + w, h, + buf_corner_x + idx_1D, buf_corner_y + row); + } + } + + // Make sure all threads reach the next part after + // the local buffer is loaded + barrier(CLK_LOCAL_MEM_FENCE); + + // Fetch the value from the buffer the corresponds to + // the pixel for this thread + old_label = buffer[buf_y * buf_w + buf_x]; + + // CODE FOR PARTS 2 and 4 HERE (part 4 will replace part 2) + // This makes sure that we have only the first CPU running + if ((lx==0) && (ly==0)){ + + if(old_label