From 84fe333a98542fa2e5ad064da3e09b25ab508ed2 Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Sat, 17 Sep 2022 19:56:02 -0400 Subject: [PATCH 1/7] update --- src/main.cpp | 20 ++--- stream_compaction/common.cu | 20 ++++- stream_compaction/cpu.cu | 57 +++++++++++++- stream_compaction/efficient.cu | 137 +++++++++++++++++++++++++++++++-- stream_compaction/efficient.h | 2 +- stream_compaction/naive.cu | 53 +++++++++++++ stream_compaction/thrust.cu | 17 +++- 7 files changed, 282 insertions(+), 24 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 896ac2b..0ed7805 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -51,7 +51,7 @@ int main(int argc, char* argv[]) { printDesc("naive scan, power-of-two"); StreamCompaction::Naive::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan @@ -64,35 +64,35 @@ int main(int argc, char* argv[]) { printDesc("naive scan, non-power-of-two"); StreamCompaction::Naive::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(NPOT, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); + StreamCompaction::Efficient::scan(SIZE, c, a,false); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); + StreamCompaction::Efficient::scan(NPOT, c, a,false); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); + printArray(NPOT, c, true); printCmpResult(NPOT, b, c); zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); StreamCompaction::Thrust::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("thrust scan, non-power-of-two"); StreamCompaction::Thrust::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); + printArray(NPOT, c, true); printCmpResult(NPOT, b, c); printf("\n"); @@ -137,14 +137,14 @@ int main(int argc, char* argv[]) { printDesc("work-efficient compact, power-of-two"); count = StreamCompaction::Efficient::compact(SIZE, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); + printArray(count, c, true); printCmpLenResult(count, expectedCount, b, c); zeroArray(SIZE, c); printDesc("work-efficient compact, non-power-of-two"); count = StreamCompaction::Efficient::compact(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); + printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); system("pause"); // stop Win32 console from closing on exit diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index 2ed6d63..91f4fef 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -22,8 +22,16 @@ namespace StreamCompaction { * Maps an array to an array of 0s and 1s for stream compaction. Elements * which map to 0 will be removed, and elements which map to 1 will be kept. */ - __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { + __global__ void kernMapToBoolean(int n, int *bools, const int *idata) + { // TODO + //find each index + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) + { + return; + } + bools[index] = idata[index] != 0 ? 1 : 0; } /** @@ -33,6 +41,16 @@ namespace StreamCompaction { __global__ void kernScatter(int n, int *odata, const int *idata, const int *bools, const int *indices) { // TODO + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) + { + return; + } + if (bools[index] !=0) + { + int targetIdx = indices[index]; + odata[targetIdx] = idata[index]; + } } } diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 719fa11..ac69568 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -15,11 +15,19 @@ namespace StreamCompaction { /** * CPU scan (prefix sum). * For performance analysis, this is supposed to be a simple for loop. - * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function first. + * (Optional) For better understanding before starting moving to GPU, + you can simulate your GPU scan in this function first. */ void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); // TODO + odata[0] = idata[0]; + for (int i = 1; i < n; i++) + { + odata[i] = odata[i - 1] + idata[i]; + } + + //Why the last two digit different? timer().endCpuTimer(); } @@ -28,11 +36,22 @@ namespace StreamCompaction { * * @returns the number of elements remaining after compaction. */ + //Well I don't know exactly the condition + //So I treat it as remove 0 I guess int compactWithoutScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); // TODO + int j = 0; + for (int i = 0; i < n; i++) + { + if (idata[i] > 0) + { + odata[j] = idata[i]; + j++; + } + } timer().endCpuTimer(); - return -1; + return j; } /** @@ -41,10 +60,42 @@ namespace StreamCompaction { * @returns the number of elements remaining after compaction. */ int compactWithScan(int n, int *odata, const int *idata) { + int* boolArray = new int[n * sizeof(int)]; + int* scanArray = new int[n * sizeof(int)]; timer().startCpuTimer(); // TODO + for (int i = 0; i < n; i++) + { + boolArray[i] = (idata[i] > 0) ? 1 : 0; + } + //Set temp array + + //begin scan + //Inclusive scan + scanArray[0] = boolArray[0]; //identity + for (int i = 1; i < n; i++) + { + scanArray[i] = scanArray[i-1] + boolArray[i]; + } + int elementNum = scanArray[n - 1]; + //Shift to right + //Exclusive scan + for (int i = n; i > 0; i--) + { + scanArray[i] = scanArray[i - 1]; + } + scanArray[0] = 0; + //Scatter + for (int i = 0; i < n; i++) + { + if (boolArray[i] > 0) + { + odata[scanArray[i]]=idata[i]; + } + } timer().endCpuTimer(); - return -1; + + return elementNum; } } } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 2db346e..1f440c7 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -6,21 +6,106 @@ namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; + using StreamCompaction::Common::kernMapToBoolean; + using StreamCompaction::Common::kernScatter; PerformanceTimer& timer() { static PerformanceTimer timer; return timer; } +#define blockSize 256 + __global__ void KernUpSweep(int n, int* data,int d) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + //real offset is 2^power + if (index >= n) + return; + int pow1 = 1 << (d + 1); + int pow2 = 1 << d; + if (index % pow1 == 0) + { + data[index + pow1 - 1] += data[index + pow2 - 1]; + } + } + + __global__ void KernDownSweep(int n,int* data,int d) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) + return; + int pow1 = 1 << (d + 1); + int pow2 = 1 << d; + if (index % pow1 == 0) + { + int t = data[index + pow2 - 1]; + data[index + pow2 - 1] = data[index + pow1 - 1]; + data[index + pow1 - 1] += t; + } + } + + + //set n-1 =0 + + __global__ void KernSetZero(int n,int* idata) + { + idata[n - 1] = 0; + } /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ - void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); + void scan(int n, int *odata, const int *idata, bool gpuTimerStart) { + int* dev_data; + int log2n = ilog2ceil(n); + //input array may not be two power + //So need to get nearest two power + int nearest_2power = 1 << log2n; + int finalMemorySize = nearest_2power; + + dim3 fullBlocksPergrid((finalMemorySize + blockSize - 1) / blockSize); + + //allocate cuda memoty + cudaMalloc((void**)&dev_data, finalMemorySize * sizeof(int)); + checkCUDAError("cudaMalloc dev_data failed!"); + + cudaMemset(dev_data, 0, finalMemorySize * sizeof(int)); + checkCUDAError("cudaMemset dev_data failed!"); + + cudaMemcpy(dev_data, idata, n * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy dev_data failed!"); + + if (gpuTimerStart == false) + { + timer().startGpuTimer(); + } + // TODO - timer().endGpuTimer(); + int real_d = ilog2ceil(finalMemorySize); + //upsweep + for (int d = 0; d <= real_d - 1; d++) + { + KernUpSweep << > > (finalMemorySize,dev_data,d); + checkCUDAError("KernupSweep failed!"); + + } + //down Sweep + KernSetZero << < 1, 1 >> > (finalMemorySize, dev_data); + for (int d = real_d - 1; d >= 0; d--) + { + KernDownSweep << > > (nearest_2power,dev_data,d); + checkCUDAError("KernDownSweep failed!"); + } + + if (gpuTimerStart == false) + { + timer().endGpuTimer(); + } + cudaMemcpy(odata, dev_data, n * sizeof(int), cudaMemcpyDeviceToHost); + cudaFree(dev_data); } + + /** * Performs stream compaction on idata, storing the result into odata. * All zeroes are discarded. @@ -31,10 +116,52 @@ namespace StreamCompaction { * @returns The number of elements remaining after compaction. */ int compact(int n, int *odata, const int *idata) { + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + + int* count = new int[2]; + + int* dev_idata; + int* dev_odata; + int* dev_bool; + int* dev_boolScan; + + cudaMalloc((void**)&dev_idata, n * sizeof(int)); + checkCUDAError("CUDA Malloc dev_idata failed!"); + cudaMalloc((void**)&dev_bool, n * sizeof(int)); + checkCUDAError("CUDA Malloc dev_bool failed!"); + cudaMalloc((void**)&dev_boolScan, n * sizeof(int)); + checkCUDAError("CUDA Malloc dev_boolScan failed!"); + cudaMalloc((void**)&dev_odata, n * sizeof(int)); + checkCUDAError("CUDA Malloc dev_odata failed!"); + + cudaMemcpy(dev_idata, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + timer().startGpuTimer(); - // TODO + + kernMapToBoolean << > > (n, dev_bool, dev_idata); + checkCUDAError("kernMapToBoolean failed!"); + + scan(n, dev_boolScan, dev_bool,true); + + kernScatter << > > (n, dev_odata, dev_idata, dev_bool, dev_boolScan); + checkCUDAError("kernScatter failed!"); + timer().endGpuTimer(); - return -1; + + cudaMemcpy(count, dev_boolScan + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + + //size equals to last of boolean array and last of boolean prefix sum array + int compactedSize = count[0] ; + + cudaMemcpy(odata, dev_odata, sizeof(int) * compactedSize, cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy back failed!"); + + cudaFree(dev_idata); + cudaFree(dev_bool); + cudaFree(dev_boolScan); + cudaFree(dev_odata); + + return compactedSize; } } } diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index 803cb4f..18b0f55 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -6,7 +6,7 @@ namespace StreamCompaction { namespace Efficient { StreamCompaction::Common::PerformanceTimer& timer(); - void scan(int n, int *odata, const int *idata); + void scan(int n, int *odata, const int *idata,bool gpuTimer); int compact(int n, int *odata, const int *idata); } diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 4308876..3f51ddc 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -13,13 +13,66 @@ namespace StreamCompaction { } // TODO: __global__ + __global__ void KernNaiveScan(int n,int d,int* odata,const int* idata) + { + //for all k in parallel + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) + { + return; + } + //offset: 2^d + // 2^(offset-1) + int d_offset = 1 << (d - 1); + + int beginIndex = index - d_offset; + int prevData = beginIndex < 0 ? 0 : idata[beginIndex]; + odata[index] = idata[index] + prevData; + + } + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ + void scan(int n, int *odata, const int *idata) { + int blockSize = 256; + dim3 BlocksPergrid(n + blockSize - 1 / blockSize); + //This need to be parallel + int* dev_idata; + int* dev_odata; + //allocate memory + cudaMalloc((void**)&dev_idata, sizeof(int) * n); + checkCUDAError("cudaMalloc dev_idata failed!"); + cudaMalloc((void**)&dev_odata, sizeof(int) * n); + checkCUDAError("cudaMalloc dev_odata failed!"); + //Copy memory from CPU to gpu + cudaMemcpy(dev_idata,idata,(n)*sizeof(int),cudaMemcpyHostToDevice); + cudaMemcpy(dev_odata, idata, (n) * sizeof(int), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + //From host to devicw + int log2n = ilog2ceil(n); + timer().startGpuTimer(); // TODO + for (int d = 1; d <= log2n; d++) + { + + KernNaiveScan << > > (n,d,dev_odata,dev_idata); + cudaDeviceSynchronize(); + //ping pong buffers + int *dev_temp = dev_idata; + dev_idata = dev_odata; + dev_odata = dev_temp; + } timer().endGpuTimer(); + //Exclusive scan, so need right shift. + + //copy back to host + cudaMemcpy(odata , dev_idata, (n) * sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy odata failed!"); + cudaFree(dev_idata); + cudaFree(dev_odata); } } } diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 1def45e..fded128 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -17,12 +17,21 @@ namespace StreamCompaction { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ - void scan(int n, int *odata, const int *idata) { + void scan(int n, int* odata, const int* idata) { + thrust::host_vector host_data(n); + for (int i = 0; i < n; i++) + { + host_data[i] = idata[i]; + } + + thrust::device_vector dev_data = host_data; timer().startGpuTimer(); - // TODO use `thrust::exclusive_scan` - // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::exclusive_scan(dev_data.begin(), dev_data.end(), dev_data.begin()); timer().endGpuTimer(); + for (int i = 0; i < n; i++) + { + odata[i] = dev_data[i]; + } } } } From 61fa85fa129fb10528db23aca90a0249a04304b7 Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Sun, 18 Sep 2022 13:11:21 -0400 Subject: [PATCH 2/7] compaction upd --- README.md | 7 ++++--- stream_compaction/efficient.cu | 18 +++++++++++++++--- stream_compaction/thrust.cu | 6 ++---- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0e38ddb..32e2900 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ CUDA Stream Compaction **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. +* Hanlin Sun + * [LinkedIn](https://www.linkedin.com/in/hanlin-sun-7162941a5/), + * [personal website](https://hanlinsun.github.io/) * Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) -### (TODO: Your README) +### ReadMe Include analysis, etc. (Remember, this is public, so don't put anything here that you don't want to share with the world.) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 1f440c7..10e9978 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -54,13 +54,17 @@ namespace StreamCompaction { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ + void scan(int n, int *odata, const int *idata, bool gpuTimerStart) { int* dev_data; + int* dev_buffer; + int log2n = ilog2ceil(n); //input array may not be two power //So need to get nearest two power int nearest_2power = 1 << log2n; int finalMemorySize = nearest_2power; + int difference = finalMemorySize-n; dim3 fullBlocksPergrid((finalMemorySize + blockSize - 1) / blockSize); @@ -68,12 +72,18 @@ namespace StreamCompaction { cudaMalloc((void**)&dev_data, finalMemorySize * sizeof(int)); checkCUDAError("cudaMalloc dev_data failed!"); + cudaMalloc((void**)&dev_buffer, finalMemorySize * sizeof(int)); + checkCUDAError("cudaMemset dev_buffer failed!"); + cudaMemset(dev_data, 0, finalMemorySize * sizeof(int)); checkCUDAError("cudaMemset dev_data failed!"); cudaMemcpy(dev_data, idata, n * sizeof(int), cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy dev_data failed!"); + cudaMemcpy(dev_buffer, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy dev_buffer failed!"); + if (gpuTimerStart == false) { timer().startGpuTimer(); @@ -95,7 +105,7 @@ namespace StreamCompaction { KernDownSweep << > > (nearest_2power,dev_data,d); checkCUDAError("KernDownSweep failed!"); } - + if (gpuTimerStart == false) { timer().endGpuTimer(); @@ -148,10 +158,12 @@ namespace StreamCompaction { timer().endGpuTimer(); - cudaMemcpy(count, dev_boolScan + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(count, dev_bool + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + + cudaMemcpy(count+1, dev_boolScan + n - 1, sizeof(int), cudaMemcpyDeviceToHost); //size equals to last of boolean array and last of boolean prefix sum array - int compactedSize = count[0] ; + int compactedSize = count[0] + count[1]; cudaMemcpy(odata, dev_odata, sizeof(int) * compactedSize, cudaMemcpyDeviceToHost); checkCUDAError("cudaMemcpy back failed!"); diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index fded128..e766c7e 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -28,10 +28,8 @@ namespace StreamCompaction { timer().startGpuTimer(); thrust::exclusive_scan(dev_data.begin(), dev_data.end(), dev_data.begin()); timer().endGpuTimer(); - for (int i = 0; i < n; i++) - { - odata[i] = dev_data[i]; - } + + thrust::copy(dev_data.begin(), dev_data.end(), odata); } } } From 203af496651095ad649a5ff2e0750bf197390daa Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Sun, 18 Sep 2022 17:48:59 -0400 Subject: [PATCH 3/7] bug fix --- README.md | 2 +- stream_compaction/cpu.cu | 4 ++-- stream_compaction/naive.cu | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 32e2900..aabf195 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ CUDA Stream Compaction * Hanlin Sun * [LinkedIn](https://www.linkedin.com/in/hanlin-sun-7162941a5/), * [personal website](https://hanlinsun.github.io/) -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Tested on: Windows 10, i7-8750H @ 3.2GHz 32GB, NVIDIA Quadro P3200 ### ReadMe diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index ac69568..e4e9817 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -21,10 +21,10 @@ namespace StreamCompaction { void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); // TODO - odata[0] = idata[0]; + odata[0] = 0; for (int i = 1; i < n; i++) { - odata[i] = odata[i - 1] + idata[i]; + odata[i] = odata[i-1] + idata[i-1]; } //Why the last two digit different? diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 3f51ddc..d27a92d 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -13,6 +13,24 @@ namespace StreamCompaction { } // TODO: __global__ + __global__ void KernShiftRight(int* idata, int* odata, int n) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > n - 1) + { + return; + } + if (index == 0) + { + odata[index] = 0; + return; + } + odata[index] = idata[index - 1]; + + } + + + __global__ void KernNaiveScan(int n,int d,int* odata,const int* idata) { //for all k in parallel From c158fe99cb489cbc77c9671fe89e30f95718890e Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Sun, 18 Sep 2022 21:56:50 -0400 Subject: [PATCH 4/7] readme --- README.md | 88 +++++++++++++++++++++++++++++++++++-- img/Analysis 1.JPG | Bin 0 -> 57610 bytes src/main.cpp | 2 +- stream_compaction/naive.cu | 50 +++++++++++++++++---- 4 files changed, 127 insertions(+), 13 deletions(-) create mode 100644 img/Analysis 1.JPG diff --git a/README.md b/README.md index aabf195..a86d96c 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,90 @@ CUDA Stream Compaction * [personal website](https://hanlinsun.github.io/) * Tested on: Windows 10, i7-8750H @ 3.2GHz 32GB, NVIDIA Quadro P3200 -### ReadMe +### Stream Compaction +This Project involves: -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +* CPU version of scan +* COU version of scan without using scan +* CPU version of compact with scan +* GPU version of naive scan +* GPU version of work-efficient scan +* GPU version of String Compact scan +These three CPU implements was used to test whether GPU implements was right. I have collected the data across 8 executions with different array sizes to collect the data. +This program generates a new array of random values with each execution, where the size of array is customisable. I have varied the size of the arrays by powers of two, starting from 2^8^ all the wai to 2^20^. The program also executes each algorithm for arrays of size "non- power of two" which are generated truncating the "power of two" arrays. + + +### Output Results + +**************** +** SCAN TESTS ** +**************** + [ 21 28 22 23 38 18 20 9 44 26 14 10 3 ... 25 0 ] +==== cpu scan, power-of-two ==== + elapsed time: 0.0015ms (std::chrono Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 ... 6273 6298 ] +==== cpu scan, non-power-of-two ==== + elapsed time: 0.0014ms (std::chrono Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 ... 6223 6226 ] + passed +==== naive scan, power-of-two ==== + elapsed time: 0.25088ms (CUDA Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 276 ... 6298 ] + passed +==== naive scan, non-power-of-two ==== + elapsed time: 0.23552ms (CUDA Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 276 ... 0 ] + passed +==== work-efficient scan, power-of-two ==== + elapsed time: 0.185344ms (CUDA Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 ... 6273 6298 ] + passed +==== work-efficient scan, non-power-of-two ==== + elapsed time: 0.185344ms (CUDA Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 ... 6223 6226 ] + passed +==== thrust scan, power-of-two ==== + elapsed time: 13.1092ms (CUDA Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 ... 6273 6298 ] + passed +==== thrust scan, non-power-of-two ==== + elapsed time: 2.18214ms (CUDA Measured) + [ 0 21 49 71 94 132 150 170 179 223 249 263 273 ... 6223 6226 ] + passed + +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 1 2 2 3 2 2 0 3 0 0 0 0 1 ... 3 0 ] +==== cpu compact without scan, power-of-two ==== + elapsed time: 0.0014ms (std::chrono Measured) + [ 1 2 2 3 2 2 3 1 1 3 2 1 1 ... 3 3 ] + passed +==== cpu compact without scan, non-power-of-two ==== + elapsed time: 0.0013ms (std::chrono Measured) + [ 1 2 2 3 2 2 3 1 1 3 2 1 1 ... 3 1 ] + passed +==== cpu compact with scan ==== + elapsed time: 0.0038ms (std::chrono Measured) + [ 1 2 2 3 2 2 3 1 1 3 2 1 1 ... 3 3 ] + passed +==== work-efficient compact, power-of-two ==== + elapsed time: 0.309248ms (CUDA Measured) + [ 1 2 2 3 2 2 3 1 1 3 2 1 1 ... 3 3 ] + passed +==== work-efficient compact, non-power-of-two ==== + elapsed time: 0.274432ms (CUDA Measured) + [ 1 2 2 3 2 2 3 1 1 3 2 1 1 ... 3 1 ] + passed + +### Performance Analysis + +* Scanning Algorithm + +![Scan Chart](img/Analysis%201.JPG) +In this chart, the lower the attribute is, the better its performance. +In this chart we can see that when the array number is generally small, the performance gap between naive method and work-efficient method is generally small, but with the array length increased, this gap become larger and larger. +The reason why the second method is more efficient is that even though we have limited the number of active threads, when we are doing naive scan, threads which are not doing anything have to wait for the other active threads in the warp to finish to become available again. +But in upsweep and downsweep method, upsweep only use half number of threads to finish the work, and the rest of threads can be utilized by the GPU to do other tasks(like downsweep). So through that method we launch the same number of threads, but use less depth than the naive method. +That's why it is more faster. \ No newline at end of file diff --git a/img/Analysis 1.JPG b/img/Analysis 1.JPG new file mode 100644 index 0000000000000000000000000000000000000000..de69e2ac2ed490305605cd69dc0248faea9dd059 GIT binary patch literal 57610 zcmeEv2V4|M_J0!u6cj-akf@*_NX{8l@*+7Wl^lkQBw;`WB_{zTN6Arg6hV+I8Oa#| z$vF-G#$939bJyMf@AmHYPW-e}Gu74I@71fSSKk-?NCJZ1TAOLs4f55@$ z`2it2Lp=bHlmw^&06+tf5x4fgc?BU*rHAD?1}Q0G#qi`l3hRfcip|Kk`vI zH!$E!8Hs@j00IM#%f~teTHqnye&`hci_-Ih<{upR!GRwf_`!i69606x69W?y7Xv33 zBL^81BNq!Z7b6q!t=|BE4PXV>0Y>mRL^sg!4Xmxrx#;Olt?2IQnBCW<(>8<9+ubv# zXQX4G2YC4H%spiD*VQ*P<)vIKt)wI~)ZwL6VwGf&G{2>5U?}QfsVnDjM_${( zSesLalArJF86G<>JBT?%*ZLlr9mK@cip!4o%9pEif#t)W>96qdSnB9;$=(+JzXl`w2 zW=eK=!F%`3Y^-^&*w`5AaOvICW4*7V!%BPq9j2ujQ zjQ6kna(f-^qr02iSeksfw~jWwu8A&0*VNhy78Ie{~)Y{UI~p9`v94j%636bJs)$v>p)Z^o`;EmzkRRBdfR9rmCfxCNk| zI)!oy85QLe3K}XZ+8Hc#P{%lfi+LUc3m^9q0X{B19w7&SZIj!NOMRCR{%sT1SBkk zgBpMov^kyvt(1p`%3uBv5Rs5ip`fCjK7$UPkbfRPL_k79L`FJw3K=v}A~=Ed05aAo z?8}UTC>Lbzp z)@>nS5m7O5Ie7&|C1n*=Z5>@beFH-yD{Gqvws!UokDs`?KYixm85s0DI3)DN%doik zgv3`#$tkJ1uk-Q?3X6(M-d5MVtF3!q-_YLC+11_C`>}6yY#0)S|=z09D&FQl&zq ze@(fWcmPypz*wSnyv)spOp$=?jJmmbIT-;grN>qnrC3HD;y7 zLBNB%BVSYWP$&#mxYd~_jjVNaM}woslWh|?zaL0jRcT*@fizV}nw3uv*Zhw?@X_O? z7{s7ee7$4a(c>k?!SO?EL$nZOe` zCI89nBHOCPEctG1`HINH2WcCs3I);HxEY0E_D0;RK&i> zj^OgJdwhK(5q|W3_hJ9^jiD3O1DkjMWH*rmv&3O z^`o=&>FtsTD3W~Q!fFk}IINTucI^e0Vde7t7A4Fj-!f$eQqn`t|CSRUb}H?;3ssya z@V|B>r9kTnM-G4;B!z~gRJMNghy&pKi2l~4MD~Ekn|k1iiV4Gc<0W|j_?cDluboN@ zmO3u~vsm_6fXKUoqI*5#&O%4kh9czD^NOE(b{=Q@B#_!;DFykdkFQax35D0FO?lNQ zb|V?N`#KWfr$KWS+yxgS#)`9m*OX7!>Ca^h_p*jdq&#_&`r4;CfJ|}J6?I1GA-8?H z&^WD@NSBRXXIy@Pr|4kD__IeJTL%z>+8$O)Jf54|>22(6$G<3Fwy0J#{de2yK=RwRjdzc==$a$pB95JeGM6L7IOYC1 ziu@8@sRlcYK_*_iIfd>ie(bG|YRQcLTF7+!r3FGm;R)tI=9Q=-O(FEc_16agnbQG~ z89imIxD)I%__8#=tT5`M)yw!`6%?+$H)Aig(`HIu9RQ!3Z+5RX?j8X8kvjeBRUDa) z@31~OkhopB<^i$UXeqeV_@2{ht@n+rY4uo@6m?sdGF~t3%pg0TE%wvv)NK=C0Ztxl&Z;!&X55?MR@7?e7_G2h7Dp0{?e@8Ha zU59@|WKuq3omRYV43apN6fc&jHg|qMU~@o(%3kiqQ>r`X_%scCvwC(802l239ave5 z->uGtMIMDQ8t#{c{U0R|--HYFUgC)osGsi6AjP$_6Pt{5{AoC{)cGTc)r@x!R))mn zt>yC`Wr2!=NLKZXu7%)`T28r<_lecVw)z102*Zt*kmPth*l447MYfdTJn6RKs#SDd zIac?O*9vM}Wy;X7(T-{8>s=}7{MW;q&TU+Er89xda)gbbvEyvb%}&Y}`a^7xT3gc@ zKZkK1R&DLnt+bj|W40xY;dhP`sRLA2LFaT{hun!XmPX9Vtu}Vc6pv_Jp5a!kr9}ij?paT-jx`L^e;fYt^XFKJmn;tq>DZ8N zxWx~AbWWn&#uQH*^D?-)B;|HB41XY_%QI95^^Cy#*4WhiL^IayTr<1jdYI^7&%>L7 zA9Oz~j)o1b%1hYmxCRPYU(PL_Ch{HVo{UOMGNQj?xVcd%iB;o;+%U;k#G`b5yqvDV z;30QXxIufnh9;|BJR#1bjDWffy4Y7A4f(P5nEEu**_Yyt;LsNj=VIJaxsvcPt@Xu_I&gN?sJKo91QqeYYv;75!H@ z5z%jp{4lnl;+-su*aO2Vw=8(V+y*ptQt?xf{PFm01Eq-P=!nsOB1IfpPmeP$c)jXV zO{X)IG)ZZh z)zKZU7^E5JRc7|dI*M?avx}wlqs_Cp?4k~RN5{81`yX^?tNetf%T1c0`er)xbaW%0 zy)L&G$>hn_xc!JG?w4&!u0)UGJ);dAjO)Cs_Pi5!*s*NnVT;&Yz+<_sf+?gWz1Ge8 z-CoE=p6u-w=@Ewrq;0vmP3Ox80O}HV{bB}fg7SiCtlOYUmyZ8+U9BK7VizxK))Pbs8*W~^ac%0<#Tr-5Gi@fD&;?K-73+$w_#;&$CFS_@m+Crla= zx-8AyG5Q0g1*3lCgu*}uueeh9ttE=w99zeJVLk0Ysps~oh2>_fTRGO1L3`)kj;pkP zu6ea6o9A+~>MB!9+GGv^FtH>$jgoPTMd^G(_Xn9ud@upR}0Ph$y=Nl!%CC)i)O$Ps}pWv?>9*3U&}LHG3211 zXuls4iIS7K1>NO;`+CMO>7rw?X?l&gk{%+`0YJ-VYPGeQC6;P@Q;q{G!)~Tbx{q{E zuEL=YJ)Sa@Z?Lprd{Lh>)9+4k_nq8=l8hGrDwCSPTSl$Z_Tf22G$iD81Uyr@;|D-s zBmKUs(g7eM?|%T)YZM;<*-X$+<6_pUm7^9FmGmD5!Zq7z{dsHCw6R}e2-I78viqjh z&vJVPLe%aQmU*^2?kZ>MRe9pQyj36dLe>&`W{ihcLePHta z zvc`~_vRd9|50=`^N(ZrsD}FW0V){_S{1BpX*)o;-Je2*6fkgujVkNpl)ES zB4<_XZf#w5w;@=|S@^@ERn!@)0GZx0n=IJwj0I6l*M(+xD} zW+S6{4V<&X=iA0lB!NLE*!y>8l`6W{2I=)TqGFBpD;M_TZ2W9HDD+;jhrE41MEJ}t zS&E^x%%tCYVX%W_z1GgCwoYML$z!S;GE!RUxmmAJBt6A}=<@7e$`ca3y%J zKH+V3r?AU3|?nKAgg}#+a_$X-bb~&br3Xe*9g2-miC_~?j+gWQ$av^KdDic&M z8ij&0*9k?DZ(z&an5|I5@+YKxB*MXZAN>VgM*CQw!IWF=L?9bRIv z8i-0vZI-x~wGDl-)8A6(WSC)SyGd)NouV?(@^TP|IW2czfHMvp|MM7+=4A68H&_rw z>CBemd%GQ~JMZkf<|F4b3rE#nuK5s(`AXhob-SUyJt`iNc0LBnw`gNM!-w)#hmF)T zXR2tq*Y3-)cG#?uNU12#bliA!My%!)%;pxr6Z={pPAT>kD*MiGYTO#6ECh{Q{0J+C=~F&>pMzN%e5CXRh&W1F|kbV6CVP7EnJIh z_!zQM7qI!dpV%nM?etPeY=YsXOutw4(-%D(qSF7 z+=R_mVjW4!tY>9gy7dUKTFsx;EnQ1*wKX$Fjke;Ee&^3qPeeTtaZW1bN)Z%s#z8S{ zw7#kv$mzq25xUR)F zb}SceKkiuOv6|Nj}#zECB|E&vZgB?lo z;STj^v+8n&D(;_hFy?2SHqg8raJ6gg({wQ3=a^hRUiRy#Bp*{)v@kvmk@$27XZ!Xv zjrQuktVkE5T+L9WI@=JY8LS^S)7CrVzs=SbD8qiSR*W)GY*vyk@s*0v2EBetzlfC? z<&aMush0Kvbr%k=EfTuOlG4*W!peL@2O}X4LPZhVfalgD#Tt_poff)n`fbR20Tn|_i zg@|tuo8A{0nMb6(h%fESTV>SK+4Dg;KulgpP*R2_dQ7E- zO$>rJ>d1-x?sOFk0rCYMv^N$r7inWZ^3KuR+4UHbziWO6mE^gyVMv&MA73?Q@W#u& zF|8Mo?d)>(<4J<=)Z$#OX9nwqRnMkj?wI&LyChq)=Q3?T@MijhOv=nft{O(6^3 z<{NE$pA$747|0$gQN^6ouYnO5H)ASkoV`f1FIsDgE-DZ~hoBze)-a)SW^#V-vi;Th z&$Z za@$Svw&*Uue%k0er4%Mp?mG=Fv**$8i@SvKqJk7weQ|i1Ze=fvyveL|rj)xAM5ro=a7 z9t(BMFPde>7AS;D*Y)Mrq!-~6$XaxKuo}w68y$92ppZ$p_L1xHji+YZ6LIOE-7rG`W7jw!;RC2LGD7zXrFITmjjDninTf2t0zM*Zj=?zdC6(Cf71trvC5@FC=pV)Dn)zlN^|F4^vK@-zZK-$cQ-dRO4-ZB8q zm4d|%AOEiHxv0D_e)qLVq-HM3us5sPP5!0a6+!)ckIC-22ck;@-}YPQvCB<41j_P*a4HbFMq9O+*4!?KxeZ zB;TsImt=9_upwq>F7Ic?>_=sfqZqh5LrWB*Vl51LC-`(iEVr}S#@P56qcHh>z~RkN zv!sxjGUoJPPOBqClrju2@42|bGpu`^UFHkEY$d_y*` z)vQx1tFtGUGS|Sg$sW>Z8u~N+{0_usWG@O?mUiaL>f^{cZ>E!^jCy`!!U;C@1meQj zqKP(F(RJyQUFZq}-%8^dAYbs-_NDd>9T`Lts=x~~jmWtZlgcf+m`zf83mb3govDoQ zo4ZCNs7q5RPFE)vUS#l8&JkR-tBGDbMaDixb93=_N0zl)`vR@pXNkNURN~b0WSMN^ z_+$z@RJ~_HcXVGRZI~5QPwj`;6g{0HH}Q%f!Q_=f^(+*^0c8IF4g@KA>F%PtXxo0| zZI=?+*q%)Ey_MRx!o3LE~?a9wBQDN9e875)9@^aMY z4m#s-VbL4ppk8wWznX&FVG2e;%Z)|qxAcfv!Be;cc#Ec=M7S-2yr*2){#&748fq!* z8OHpT@VMr?GsJJ}E)-)-pGgI{>v`^3TGgqxN9s7*V-xVC5QvJMS?IHI{dcOComB_2`p@Cdrj0XxEH&0NDZs!lD* zEE9MhmQ@Yu8y9NQ@fOc7xQb3#^S--lf2qCiQ=h_y2WxX=nRLC?)wF(>_SlA31RZA* zXt3iW02JoPISC059wA4l{lyxei+vw1qF#4t*|Q#~!Eg|!&Z`{Whib;L8pL_tx<2a? zu6bIk{QUu7kt%0)4LZJXZ;SE(xFBPs1sW9iVJ=;kFL2h%_Cu!t($Lt}C0bCiGXs^p z)1Z<^tDb-$gd>e0pKtu2v_r?$1VUdOP%^ZoK{p;e{>FwgXI9k$bT`Avyy;=k}R!qXAFe>Pp7f` zX@mDa-(+^ep8kKhyf24B!i$TcXSPVoO;J8LIn6c;Vea(%?R?^2!;L{YGXDRj>%bo{ zw*6__Bb;Z#Kj-oy6x6vlktEz7k`5J;8}e0j>BWWbLn`;J`FGHz5sthD{hINc%tRp7YBPI}%v0%B|IK zSaIP>iUGVk&rw(ZH4Z0#@#RCtjejb>kGsUq$gwTBC}UJRYHeJt*ROv*$9o!|Ba3|r zZzCe=s){x^+P8TA}*`=gSCaFjSd;K+bmtS;sa zt>=i&rb(o4x~@BxEn{$i@?UoVcqHf3Z*A<69smMH#Xp^KP&3Vc-@4>0%tHl!EQ)!8I!EwL(7q_9=}6pa<^&_#mnBePAQ*={}@x-6X)f4bUX?jb_LE)7T)TTDEst^D310wPK{~i zHO)_7R3N1r&|U;UBV#T1he8-AxBk37>LP42z|>rkOhGj_nXdvfBEI%2N&GBj-&WVC;BCaIe&NIrbs!U8W(ru z*#y7-0U@5>a+@?jRHgowasrOQN;0S)YOzIGNq__UZ$>pD8%Gm>eGytTM z9cC=srC2DHU&~B6pbg6}R~kBY$@{wu$=@LT)bO5t^8ujVzb&7yI5l{&{XRwnIf<@z z|JJ8w4BsWo1RQ?w?9k^)y=t^_F$e+2Ru|DDvM& z-4JjKcR(FS1Jv(sveF8{&lkV{xF9)L?0e4O)S^DTogpCuSN?uOP#LU?7+!C#43?0A zEB{VIS!`S}Z5I?V-ee@yckE2{W+;9?i_d!)+5zBGNk?PU-ABVMnvv0CB10xb;-ZN8 zw5ls#t=w5=xA59f6G_|k$;u1+#__+_b7JJB4}gp!PRO3TpnRmbR=!<0a)W@CvKuT@pt4J{u#gV+GDnTqxKMRej zhfO8}N|D7a+|s9fep^3DWBD%J`kOR^D#tG*e--Orf8yc{&p{4=xma*bQB6FtxFUcm zp7qrQ}4lW&p3*!C(T&9HH>?*J1)eZ{SqkJDy*9QNWFbJ#ZS zZum&iZbi?j$+oXL@$Whu zR|3Xci3+m!QlM8vgLp!Nh{~va!!cHmCdYKr143k9d#u zoJRY{t9$KLR;IFuIcSXyDk_DX+jqn`D11>cSy@9olKTjqZuB!(;eNIiJ^-3D5@qlv z`y>0k=(w~|Ip;N;aA^6nxM6qKVPzTHxF@!GViT2ndyU)lTk@b8`VqLZnezfCrEBRQ zkTt>Zn%1VuK+r}@a$G!&ocY>B=4(M743GIwJzWUrj!yLc6~U*(q0!GB6;eG(*9%G4 zKR`dx2%v(rt*+8fRE~-qmB5G|EPHT2%+CaqfZ-gKa1SGcj!ycII$cKMf(O|FAnHHa zsyjZ(*JT8c2Y&6LzeQL50WmwJW*1RwVL`S%9HX^JY7|-z-QEse(OUWy5V)cRbJpDp z-J|+ipgs%|yLtdjPifUd-})Vu4&!sap7ZZLnL1Buqo}TrV%MhyugfV_>*x{PLFaqW z>M#cy%vcYxv7?IrL@K_`%#5ZxwAZeU-CAKsrsr{@KBnyi==YyM*l7S08sL;Z&`MUKl*}zy z!8oi0bN>{$SO3NP=lblCwJBxRr=W+DzlMVVhLR#Vad-b?EdVaBto+Q* zlsny>!CCAbtDkgbh6gmDbg$+U(-qmw`}#9`V+A~U-6Vx@gKTZ}QdCsHjfp7YvF7`2 z)D#qf2x9&vn+f+YGJKPqP0WG#7lX)AjnSzDJ;XZ+- z2As?#_sfJRgm3D%9hNL=yR&D*v|sNh0F6)cT_h>=$uW`@Ui)oIWf0S$%e*$;;Vmz_ z(3;x&?rh6X*t`v=LWv&)nmcTyv?ShVMYwAQc{ffs3Yp4!A(A;Tjaz4LC74)kfi&B_ zL}d0{O!We^4WOE#|FHyIvNHq~+= zzGzuAvAWrH0BH4uqsGB1Gwwvof;ghW@V*6fAGs?mBL(`NdJ$Gs343HDSxCCtO~1Pa z-AX-#&>(et>t3*ZQW&+*-?-vfc-p|Z#o^rn;0zkiaAj`4-VPcQg>AJiv~G&*$|D&4 z*ARSU9F;jiu=wk3e?3vyb{$Bt)PEDfztwTHm0hGepFo>GR*F$ZN$X^U82D{lYd-@c@q)V&SzVS(^{x!Okw1@p$ot<{!{%=)*5^}to z6z=!ip2OSU;onnW8#cG!^jB+|aCO~ObfNsk5{ej=9GhYO25Xf>B#u?w1ai7Q|4H=D z6gBNJjvxmv&}jJ+A-iSKn-@V;fgMB`kU@mu5G_dEc`|4GYM_;ZmP_aY_wD$rZ;OGTX@^_i84G~D%&Xs%_sz=;wgv&8i6bG z_dW2@GT`Pu8nbl~SP4JuiIM7epV*^ve1(5!>HugqQiU&nx1r}Ej0bF%3E2a&Ubyl< zG^9a)$8PDxVO{#ORFf`HE*FmQN16t?2DKNLqd`rKZ5`%T&rJ0-@(-X+K@YbUeSxp7 zeg?35u7JQgd;0v9>|FQm!3XcoAj^EFHlj)ohYW^Y)ztwZ!OL!kP~Z2VEa?lzRWzH$ zqzkO#kcrkkrzU<-?8b~vie_kZ?%o@ul{HQfMnI5lBi6gR<83v0-zwl+578@)OJ z#>y(!O<`xvJ##-e*19ktL`dTun%sNhWwFm3vrSo;#+O23IXqW}1kQ`=hDe--xN_Hp z=5bEbl#*TW#t0aW&9trd4-VFWu&Wf%qBVt8H#c2;uxMe+ zVtxz1v7W|Ffn;KdI_gP=m_aI&v4cEY_CRyW$2v1}3ychNR|@{m5hF0%>z(PsaW~G+ z7nSJ_(0Zp$Va*kCzPs!WYRkv&a6e@FA4HVu7nclpIfgI)s)28$0n!(trpjw1_bM0L zdrrQkwAH^v+LE81aV0y)y?x+5hx8S{#CrzNWzHVD^`0r@=KK(75oXi)EHuM9IA6+! z8Vdcz!C|+Ye*~1_pFJGD*k!&i%)f!fdV#YQn8g4@v{BNYzc~rUo}6ma=T+}(4%OI; z%-4!~Hlv35>=tm`E4CXCXmPNJ@VV67mB+1NWK0glVXlIZi{1^xx>Em~nuEt8Vsl0S zXHDuHR7Z;iChSG>sX?z=e3yA0HDr~L-1o)WTE^LXT`YC_yP^o8n8rYuXp1m`becZt zLbR#`F}i36C9`3PMsPyngO{}P?QG;7WY!}JJNV>}-8nPv8fNCr^*_@uD1C%Yi{D7j zQ|fA6_g>vn>`a*@o~vxH@?d{{!Asu!YRy-%xQzqP#Gb{~#uUEvDSmU^dtsxFwZH9V z$Ifcy6sFc(Ne=wH-5GAmKb@|%32KJR3cq#@y?DH)5yG7YVxfQg8X@e~DJc)~-Esa> z{z20D6Sy$Se=*tAty6+8$#ym)q z8w$ajS^**WDmW$Y&=B%>u2RsAp0|Ll@O(BQy|I(Et3Y=w^zfb{cZ4C2rePfT{yp0x%N)lOdNnu?#;Ron5&7}xA=^@ z9aP#~rTZgAL4c~>3_i8+_}ZLOYE-H_gwOb%8BZgR6rnu|Z^N1BMSmuTbMX}V-eFSr z>V$ns#qv6BG-QXZg~$s6bX^xJdgsrWdKar)bF;kjma#P9Y3|6*RjVCDBbh;ya@fF% z;tl!@FO}!5_*0}2g+0E-)3_fLc+))ug>YmE;8)U<=wb60XOmxB<>9~M!nw>QCWCf* zH4t~vK!rWZ`@Vtx*kv}w=z3*$Kj>C<3-m(C|GpOr=%IpqY|hU=;a2k9N4`D0yiK}q z1)a*Lg)4rynGoo1Lt+K(VS_2q!k14pytnmgMLx&xPA*4Tr`CJF{b1O&J=E0Rfyc6c=YgBYYS{qYU{EPj^ec{ zc!!+hEvFIOzWvDk20;o=K@9$OMBA@&VZjBoT?Z62WbumI7RlW9iYZ*%)Qcq(Qh)^@ zV7(64C}nnuw#Ca4`H5S|VB0f~gpm|d8+)^uiT02ti$S94Cpzsu+Y*HQ=uh!Z)yAr%0c!Qy z$pH?}8w``?=g$*nA?0hzLFk_;M3Vbya)mJnPw5t{)CxOnJK92Dmkzn56@qd3p3~ZJ z@Obk{N2+Uce%q6H`}_QRl4Kx;I0(uzPaKFLCV{T$@iujPD_$^IH0;_TzWDdAJ26^K z57$H)=y1mYP}3>H0@K;@<*#7gt=?~gH6J?V{{6}QJsM7U7M);)Y#vO90Az&)$cm2t z1y=kHo6wIzr!>=!ZGEg-XWO~Uu;l|lFY$zY9OABp>d;oI%EcKFfid{6twex>)>51K z`^NVN?0b7j?yYNW7-{s7t~Su`?tl(dhnBbF*~91WTS0dmKayHV%m4A(AkkeDlYq;- zm4hj^a2y)m+}=2%&AYFq+$!Tm zTg+3oZkOU()FF#HPTn~qG0K(=t#+p5es!;`wU$S38)&(uenx4icXvU+F-Hb|T(^~s z!|a8?%*#^XLaf5pU&^$KQ3dbnJf=RE| zGGIubjL5Gf4}td=XGfg%7MZjH4|17hgc$sVtgBYg-EbM+eih!UHOzRZ^6cjx$E;vhWfcH5N3tLBTI3+6ei*y;ovfkvnuK$J7}DN^ss~YN6lP~ zl4^$)#4KEm|CV3H_^e%PB1DX0d#G_oSb?yRdJvSWfsW5{9oAbn<_~u-eo$y#hH{)| zeZkV1GmG5kUyGkYQeRTQo8 zc)>cU`y7V<{&!XyQH4R79*U3-6YP(|Kp0*x*fJ zAWu$Meg6kp@(*2qreK~w#DYkF+BuhWk@SSt4Cm`(fcR~7^6;%0f6tmUFq;YJvtt4J z?ASl;f_^-qKEo$UII(wv!ypcQzW)pAs0Z@CxbRrG=H%XfvT*=BIJuRKgZDf5_9hB^ zLCKqC*8Kst)7nPQ=+;qJF85!)YPA`jB*4AVY%sH7_G;#XN`LivUz{`icJ}N1w#8Gq z*3hj;JKs4Kv#`E?wgTs;pf*&_Jo5#C^E{~yoxXUy4Is?qpMI{j2?n|>v(n;vSGDen#Gz>)oLq< z@u51lze%e3axEj}s!L~MspV!QW&TX+!uHiF{;#6@mJG%fS5A z6=AzM&osw@%I*zH(BCCC0P_Fy zJL=C#&98DLOj;1UD|;^4!n$l(+a<`+Sn-SvpgRd zEwY8eu$eHu=>V;QqwYwqj1OIe|1$*Z*dO_Lc9JZ--^xiXza4pwiBDlI=Jcmu9Qu+{ z5g~S0oj1w%Yf~i4UV`3n#ZO0~_asl+;7`1n4=y3~&kNTr8$*bb$;M{Evih5?L7ex* zYN$MqtERxWUNNJ(uU*AD)n>hP8PbMe6kW>MqU3u{#3+A1QpX0fFq|7&aH^M%CvlZ+ z$F$aBUeO@tOy7wJ%@@bFV1VNL^calm`!<*10j`K8t{@s5d z>Gl=)3{=T*uEyVeWW;MA&q0a(IkeO0#JAsXoGISDsFCNVUR1EuHCj|utdbEQ=4eiY z<8w~*`3hS|Rjn|JrQMuC%sFzIf=aKdjg5kmbFrXOVx$)Cv!X_0s&dLtBjQAzAO(I)~l}MXw`XVS$!uV`o>PnM0#yhl&&=PAdQkSlIvu!aGS-sq=|q zvNE|>aEOc?g%?11%9Km^3a)NS-yQ&sHyxlFm94T)aMQ4?QzCyU{r-;GQu6wVWPu4x^m$2}x}d{{Bn;>1q%S!e z+oK0VVf*Y`G8@lfpDQQ4on$2fSSPlK8J1kqH#kfi=KkS|~*RqY;ABQ|8V>e{D^O;6__E;H=ieB|++eW&Z>nx!0+c;i|I#$JwlTe*7 zU;jwX2A9n7`+2tD60-evayDWbY|!KWV#p>+`&;%-Ge<#4%kXA+OzLoN$*aOuJNL!+ zh5Rd{2EE7*-0S>;YSednqP%uK>CDpfaaGQx@J@T)#Y5>3aa)&YnL!Rn<2<|Q_GnPH0il0rg7%_yas87LM)O))u>W1FTjBSBHcJ_*L+;s<8m=Qt)Wb9LLCgH%6*ig6gq@07p&a+o(CAIBWH3@!Y zKJ*U!hPpy^i1Qs??0D(?l6mth=Z+V@L-_BT3;#ds@+W*`k9y^#4wihF+Vg$c|2Vn)O{OTA>KW4NN*Gc+(z}T*XWE=S{@B~fPkb`2 zq7;HDQxdJ&>6QrY;CizAZ%wJ!p4T~lkILEU&G~o^V#hS^Ik=SLnYZLRS!tQyD*9R2 znD0b~c-#nhQ(@xU5+1@zlU>1tM@~>{j6L^`E4hRAwo*yXbM&Aq)!JAkUdCtLM=>04 zCR17n1_k$rIkG*;7hOxG&M2RiFvfXl$Q}b(uIX1JPaaQ{I8s`|+dR0_0^5{#o_eh9 z^GT624QNDN?8dJT@axFed$FEj4Z1Tms91+H9{_aS?`xGv=lQ_*w28JqD9aNzjVq5& zW67t#prS0bp?qs-DC%Nguc^uI2C0u6tg+i>I9#=O_Y@eu5=x`E{lbC!FgvFzBqTDS zYSM5c1C8JQ$qki?Bm!33qt50x#D9oB1@19V)n9*THc^=&n$FVCOE(~R&(vRJfNEl2 zN^Yww^;-k`OYl+(MY;!=qWT}lv48zHfBs{`Te@`l+DmOG;>4yPid-C|i@CwF(?;%h zOO+^H1DPD8PwgJPp1_!B>+zkkz_D7?=vJ)v0*)#&>huloRG!lSqNvE@EK|#T&%>g7 zFWKMT-Fpr5p?kAyYeHXW04bYqvFLMQO-?W8u5mC;UAgKVR4SbMIm`HlPMW?k(7=?0 ziotY_`T&5N1pZkQ&_8Q_GiKN9FtyQO{#+0$qZV}K_4WsXKq1tt1h=k}jp#>3*rru} zYbheS9mbYdaY2J?Qz~j_W@s|rky z_}$}F(e@Zqe8B_YNj9M%X}eQ9jK{PJ9?P?7)zAZis-PmW{jSdjNdCU9N{@ym+B5Sc zyu$H&HPp+?K4h={|i4u~El@`i$GJO#dLIx?Lsj*`+@{mvg65acdt zh+X2~y>%s9k)-LYv~n5WL;1Ga1{=|!R=W_9-WSrP4Ue<=x@7{{A>{d{J^a>vPxr2M zcL>D~iW%dkXA@>IVUbK5^b+9>yJkOLndU`^PqwCn_2+cu4GP|fl3i8{(G3|A^-w&U z?_xvvd}P#Juue5OKP->9QT$=uZZS}*d$|Yt9rH~xu3SUQo&=jSv}3I z%(r#bA$?6b11w512>~}fF%p;-W+KdsDWmH8GBmX+qH@dF@RSxhPGPCK4Q?V(gz8Fj zEI0vE#VLD)saji#L*0(6_n?q_8@LiGA<7i$^fxq#tGR_AjK@rLze+L7YSp$dOmqr< z(UL;ZG00*mh8I%$j0OvBV)+f(IpK4RvUB(7nVLu_bBja8m9Lc8bdh7-`hx|H+%p8@Y5s(rLh{{Lq>+;(etO&1=v6u=* zuA&T4Yt-wXf_y9`I-GY>x6(E1^^Gq^Ck_l}m%8h(kX-G}d7o)D#kC^9QR}oUpGCW9 zTBepXcrDZk!PBYLf_AJ7h5u@98s3Ws^+DcGh%uW4C$xG-FnT&43MI`bT_J3p zuPjAjukFI~?qov$Z^pc?BGSj_i? zwJy`(%_C12l=vd7>Z0)fG@ zkqLcM4xS#YG_?F^EY+D6sQyPy^*ZF-CRXs}9a!5d7l1^)`~rQfoTfWMz9iHsYbVOR7! z|5DE_o-r@u{6p`%B`Yy&?_i6x_{44zNk<*h@MFZ2~}|*H5W0R$zBoJAHhV4#>%wvqpEQ; z-bdPAP4o%Qxf!^|klv8PS6EAP<#lF8Q zrk+B<#$e?yaxW;H(cu4;_uWxZZCRg1&LAiP3W9>5B2Lf0$>6VPN zJ2NK#Sc@}fJDP^u(YD5`0vHg?@nTK-oBg>ZRnLxC|K1fycyQn5@}-DPNH(%q!T;$S9E;iM%ALDj(5Akf(Mt9bQ-QZT<-9Qs zoz&c=r7~TerT6`x3ZUcC1@L_5Jd0}8E|`zbq5RYZ;?0XXZ*wl~p22D0+Lf8-_zQeU z^0{`_LQA&34Mp6=ScJ2&r*Li{!-pCM8|8_S^iUPn9(jz1^?rrHc^juE7n#XK+tH=d zeYHy#GEZd({pk0}O(N(H@gGygU>gKPs5wH+M_tZfWAl1hB{8rzKej;~ZjUsXkb(oQ zE0D1c{6qE?XyH*A?Ci9>q3YQVD|C@wtUBnbbo(8Vkc0X zK+^q$F;(n1n%Y}}j6ZmYMNI3R8_4c_*r#}4Oxt@BcHXXhzWW8zOXk^EkTL61v1)yc z$8mdyid`oXLb>$PY+EUvc@OQpx|7fGhxIFmES36NYm>O1dP$LZGJ0GB+PFmj5OO2O zT2PdupTv&qEv-9rqj#0Z3ZQmHyK^*Ze6S;b>LKZGjIOd>xi@1)p)`)+>))I+OR<_awNGo}jL}hCn5e{r=KvL`9XS+^=mUs5= z6Jx{tbJaa5!EW}K0PQXOoxt@=`1&tcegNgGZbxx?xQK5LUPB?jC^bAbp)dGuI=)S- zt8e%q>hfJbRj=1WbtvsKAH_@Qqb-^G>C$aREN*BUS1nbW;CBL9*p}-Ri0F`35oS1U z7}~`A>j}4Xx~@;s94$M)qB(L7@4j<5jwnfi-r&*Q zlVP*yOBNZ%eTBa9*!JE;=b@)KJ>q2r8TGIm@Qt%O;>6Xiaq2^bMhPMX;&QEp7auG0 z7|&YJi|8aY7_h&K*~!-eFdN2jyTJ?0&N2FYos;dHO2f^G7ZHHy8tZq%esFdr2oUWf9#l7 zSC6qhOooh@=!MvB?C9{}rO72Q8WE2|b8q5vgg2Xk_&u*U;GfGHQmQSVnMG! zd-weKv-1s0&H_;ZfG5criR=X832=zkgCTHIi2kg)bqrv#FKOR_*IDeZKz>I)5v{aG z{Utj50@?kN zyZRCD5nxBhyqZl-E+Cf|wO|F788pR`@^XUmcrm7aoB|0mOr=2r=naPB=cV|EN@cMs zcm|zp`3dcD)^-)7rRkk_L}-P@3i+}19ZbcX+ZnulobR|r=IcyNg?Jo&cE%LfFoy08 z<}|UiV|>8F2o0`XK8ry4=7V>qAp)>^c-==h15{PJK^EY_?Xg$+FIM^vvFfCc>t01L23YK7_>hph`IpJBt7{`$93~x|1yPdueGErRM3Ik;otwQc;VO#~f5>A#9Y45TONa!lgA3 znx1`{%{O|6ge~skmdEAKNH_F%Y8R}Il8M9OZs$Rvk7DTbv=<;IxU99Zb4>`(uZ+mi zk-LhT&iAOwYQ}iC>hBP#&JD=g((BGPi51T=n9zr-aZxbW<&sS>7zM~`{YXT&TBA6n zP!*X{amB!T{V6dt-uPjYiG^x79)t<>0r$J*nWa>aUJ0DU8GX7#mFaz|yi|OrLD!7Y zz2q=^Sxab!Zbq3e%3Als>CoK?W8 zSu5s`r+`Qa@NP`jPxG;PFB02(ahIQF2K8OU@9y)!+S}kjT-&%nBs@EB(8B})Yj1y7 zl#FaMN4>P6R@+pI_8|^Wjol=G9Urz+T<+i;=F5xThk_BWZ0>|M)UM%NXn^bLED#KGV zj0=eWf-NW4$!8#`P*Yz0!$BmlgmBAcM}6k^d1T#kw4aq9gNr!PY)wrpJl!wum#SqolnK-%{||;I^Su!|ox8 zSxx0BFp$k`tnmHQBAag)ufIGYXQxQ4VxQ{7_+qL3dI23WOKx8V%8O%|im7iq~hmxm;WIZo6 ztak@pbb#C9U7?ssk zPImu0mp5D4PtN#l9Mdh2xi21CZ@+{<0}sW&(n%$* zY}pIzKPupYq9Z3vX{E^Nqz!9Qe6T=(?5UTQTnb z$x&A>lQ+e&!@K!rCotVc%W>957!otvYJ{2K?cy#$EP)vs?wPy2SWo1&`5Z+B9!j;R zS|=EPZrCgU9?IIfr8RBn#b%sWpMqL_fodJ9+sc8LvLdFxPdZ(JEDeT>(!RWEnHcq` z1EuCRoHRP9sk(>2 zfm~WnvA=?gY-0~Qoy!>1x+7;N+o2Qvg8jT~z|!8b4V-A(&w9T)WZ-hC9hu;vD0hG^ zp&rxNwAGZgkt9-HT_RgwnaD&f`1V0}vMG|qPBgF0YS21xSPwE@j&uG67YQMXX!`a@WOsw^9`s<=eirZBW~1-_wNGI0uXz% z_h|!u*mVd9lfAURyLP#yYLhrVdj-Pe_`jp20=|O9Z-;d52W+H6&Lx{6^4rl9nv7vf zTYHV3)9!2 z&{OcCkYQ;<+~xQ=wa{6hhnE!@Nj|Vb-d*oBr_aTBT@CX9A$-!dIyEoB*ASpAZYT-3 z9(KTl|H?V{UV;n3Fb#^tPRsWn6`lkOZF&M!E*Ts5TUn+t61^=zD0FL6M?M1D5dsl& zbum138RwfDlr77fh5fkSDO(CGAovezij$pmuQUH~5xeK zR_HOXgD=`Lwc2Lz$BTD*=JpH3bFzn!ulq=>)+^`DYwOu)hC#h=#4>JXh5J0&Ru_(M z(CQQcODeeR1=}Vij_0h1QwB*LvJ+R@^R=O$D^i-^=&xB{XlW6xy*9c6X{FPi(L~y> z61mj82s2HS+TYm2ZTV8o5HS81utwu2yh#0XQ0v7*_(g~g zfG+1W5V6c`c=oKUf5!kA4-rk|t#ccA`5CNmI`w7_%N8s4ZUz2MuP#>|4VfPs@JDlJJ z?qP5>FerZ8z#i~h-Fd#(L@KZVFkzt_7zdyN1U3Hd?*Mjfojg+uyzf?Uam{1qgAg}X zGZ63y1O^7`CL7OX9PEsE1lDYx33CFtJ=#>|`ZZ(uk2dho@C-Po6}%6%f(DY`z+AJY zhwJa3Une2P>&@B7oJTzZP);lg{J&ZUs$P103)vQ$h0`D%hsBS5tE~oFn-RzCuH&u7 zS#}C;!xs~)02_OKG|T~}3B0)8@&GzA=wDn17q}d0$(H5;XYBwXWC6$d=~-z!Ffu(! z2^#aE9pD1mz4#6?M#w_gK-t%Jf!N(qpc3O9Go!9udwlFDJECl6#2K(UciU`@i0`bJ7CdNtj*G;#a ztWcxl5%Nc**JrTnE|Q$jU-KHV8k963iinPXAD`+Okf?OO4JB6AY?h%1Z(ANA09{s4 zk?U|;8+KY&tKWx!{rAzd$tusTK&)Es($4whdBSzO>Z#gWgBsPhlgHY**0z+!Uks@u zM{&**c=7}c$+fdCi|?Z>YP?v|HurZ>rzlJC*uxzDRJKs}InTNG;!WyBAv||z9 zl;>gJm#;uI^oG~d<)fRNjKTymPKuYR1IJ3Vh;!=p6Q|1geP}1Z>Rfce-(L35?`x*` z#%+Q#fDt{mF@WEu)y4@B0YVG(a*xk^ws{f?pgD56X@L1NX?s z27btMyz*3;^ps9?x&?m1{BSC zLo6I^F9Gd+q)R_Rk(|qJYfixwa}2(sRZ;bgee$G3m^Z+>TNHDotFmwA~H zDdFXhay5Qb$i-=<%{}YaadEvYG7*2n3~3mS+nLpkNDt9m&y#0+=Vb4c9yjGuQs!i! zCoIU^Wu}HJRx{=n^403Z-PvGc@V?QsYU`Wkurxa7UA>PQU0)bR(hCPwa|FM)v3KqO z&I3AVfZZEQOjS~M!H<4bG(yVeXj?q+agT=XFI9OamKZCkZ718QCwS+N*}BrN zYxwPhO%YtQ^AZZhik`)Z^~sOrYC9#a)kcKSu{1R?98u*>(e%V6kbgPsVT_XV_e9_B z+>P`P+~nkRZp`5$Vjox6{yxa=QHexDVdb*24wJF^vrq(nC=_Z0E2H7dm z{>P%xJ6;2R3IB}&5*>I(n0!;k+je2PHc@y5x)Q(`?fFiK4&8yOa9)Am*EERRaZP_ox# zLhBy6y$iius{uL>5@cUL!rC#1F6cS!cKaS;F53Z#b_qxRiOKvAX7qf5$F6rn40P|- zLrkKk);Xrq{BOx5>Dx_9nh_TSOqa~_!L_rRg(dZ78aF)lXSvIoj`60w(Q$=1n$BhE z2k<4d_YmXmcUaZt<%Vi`;#XYphh(eHUp9i!Sjv8&9g$}|*O~loY`@$%{LSo3zevyg z>2MTGHz~&t2r2XXXQ4;e_gI(=lTOA&gkJ7wQliEQor*qEr4v0U6~%HG87&!dm^GsNMQ@JtiP8LB-iF{m{C*tuo{*i+ zTwWnDbW`WaPdQ98Z!;h3qfS$uuO45DwbatnVvI<>Qqo<9TVX`|7;2)vm%L@Fs;;eW zNWw{&yifKsEPX}lHJzY@P(RhCZR+68*r{A)B13{w=Udx1kZ1?N`vHDm{V}N>Hz4$T zK&V&pWF&h9-m@;Hqm{uvr>@IWc6aI@q06gf+fd_gWx-QpT-02Tq@GOR11?ejeZm() zYr7)4eYNioXJDpLeL9--l+WMX;i~@3kX$zxNVya`=Kh+`cKhif1YT9^yv*Ql_e1WK zO!>uwU;!DAwx5;HmPCK{rcCJDq^ZX1^EIlaA=wQ+D|0uz*sc~zw?`W(4T! zYYbNwA@d?G@9WOl3bADC?H#O4Rw}QEvb9=zT3YM0@E$W<5KXS>xDYTFu^zu$P+a-= z&Wx=32IRb1@C5AUR>zE~T z7S1_&Zsu5`C}mu_dt7SPr!}D$F-g&4y&bWeW}`Hf#gEYp+C(q#K>98%CU1(4k)FsV zz>+bpUY%OEBPRJgx|OV;EyLQ~Y6tIC~{Py$VUN{~jPb4Q5);@~AhL-QO8fdO`*QseR@OYJfF3?={jM74H5 ziwsHm8~bAKsvP0`6wLtNDqgLyw6kAMHT*b{$J+Z}u}SrHz1A z=7PO=yVb$hs%RrOtr>p1X+JMQBe(Aa2C$;7Dlu@;)-rVS9>Z{1ZAQT9cJNNqjdo+v z;GZ&2-)hh$8`(acT43Lf@`#m|r9#(E;HQNc;Uc6>&a#kpL4d7Z8-7HQUtnS&?V2}B zHg@2#0%vixTA1d_O|yIM&3-mak4o?!Qc;2xz>7Xy0Ecx;Ylth&Q#S7Jg_c^bgtgFTF@mHzKK=yxf!^!zEZ-h$yl2#uyZN5ikx{JWpI99c3+&* zK}d*P0M(#`@<;j_uDWVyC0d2K<~Lc=tY&0mqFQwNV_f!2N8UvHtlg=CSrl>1b zdlzi0Id+-iAZ2c~H^;lxx!9>Io+&sveIzIU0~bkN%*LB9G%7$bMk*1c%R`#cz#qFXA)PX)Bx z1q2?smz@$Xp}hk=7LmosrV{JSznfg%Y3uA&Q~34PRuO?*mwv^_`%GLd<8u{4$0};a zaO_80_FFFS=fH?sT!AD9tO{?cCO;qAu9LUU)Ss1$?X~H!sPeqW@X9k?#LkF6z`1Wu z?pC)Cq@c`4*Q5Ndcb-HE8hLERsXYM?)WSN^UFNkZQ(_7iB`?~%>I=tvS)PFI{sexa zcd8NWZEDX-vFmn&=MgkoIG}}YzoSn!yMXAqeAe7%!pl3ulM*j~ROuA$8roFHjK=j? z%=kC^A0!F;CxDAag!Q+#xcgg$&5?`KddjP^npPoA4_LdZI>SCaty*?U-*bNMp?h+- zmrxPErY)-D^VIeZ1W4)2P}H6C(<46&aBme=TCB259myZN3+85Ffr*K3y~l0oEEU2B z(Gh1w58rYvcBvkkwmA{-N}Nl!cPMhVtn3fLyfwouv<@R&EY!pbdP|(_YVH>IxzL2i zr|`*b@eu}*a2bylqwp#Wnk@p^*i{;Ei*EG%T)(4_8+7fael7i1c~1VDWj7$67@|(f zN(8CIYo5{$6c^{^!>)={u8(i3-D%EuI4@Yum$B+-+`N&~=DRDn8w~BAS}w1yH_&<> z>^lA?-JD&|Mrs(7r5KU*R&a(`upQ$T8*~jY^&pynNrRu$G-PGVW>}ZL=!+74?$^KK zC#Qcf6WK_eY-BSaL`B{nsw=zLD{dR|J|DUrGos%!%#Hn~lBiFfex(m@$M);Mv#gc` zYrv+d3pJUKoHyzvDC#~2GG}ccA_*72)tRy z%;9druw0?VTXb9?q8NGed2KjO^iJMA6*vt}y}qoDsa9BE8VK@nnRJt-4y5NGGeyM(fV<+iZKHFG7mlQKRiwPv4m4(sx)*Iok@i^!cipJ18X0dK3N(LXNiLs zC&6ynx?k9=sSbygodBs(N!SA0fz3}pqL9R9B_?`aGE=}7k~5)WsJF?#>MbPN(NgDK z5qZc$ZruMBQ%x~j?@6}aq$1;VNQT=R{^%-30q0OcGuI-~;xQw8f(^KcG||fFa@U~^ zhcU&)Djn}Tg4^d+PnwpK0tzMBvvN1Y@0FtlYu_$z_HFAzKE;G&qY>Q0qYF>BEGl41 zxtKV@XX$4eJBDgWHH_w`Cy?3_@<+*pcac5qveWZ((4e+!VtMWdc%E3Jp3Ct6NfiKcEtMJMUe9ica!&Lg++ zFaiW|WrS$3T)cD=x^)LBw&MULoYlsGyaPM?9f`hFZpz1khIKQKFp@s9!HNtIaFD*V ztFC2#Sz_9!zO({bYRY_JYBX!&`dW7(K7QY&Rv79>o#kY>eGf~%`ef@`SD;*vHV8c% zS-p}aM1~7E*meuXyoF+|KBqnIbwoYbGhgW~gPOuzeHhGEMXFC7U^O1-83V8Gy4q$f z9;(-u#Y{T}5BcPZ*E!j~*J{1AQ?blC9ycvt*qdulp7I{wzOZxSGvL@xA7Lz!2M-yA znMLA9zIo~A97$FFCpC##z~*7V*J^s-ejwi_6JH$`A%crAf)XChHw@Y9#h5DBdym^__qp&9GfSX=^8GX5n^jY%q1-}Uo2Dbl_;Ucx6A2VVt z?W^y$w;z7l?&EfQ+}K~h=Q@!-qB(R|FNX`gpEU!W6@R*7msk?rEeSnJWU;zecf)wU zAaAp#$BQs5Be9=)pWNb5w(E{r)A41>{xdxSkzV&!chXt?o`|gG4OXooEQd}eFbhZ) zgYt)4o3((|p%*Hxe8pa6VEt&9bZQfOQ1@<#R)b8`pe3ZIBZX|q`XwRfLa|ToHtH)h zQ>T3NC`p64E2AQ|$pT*xLrkua^u?v(-f7UVRMbjVoI;ZM`%F)Pj{8sRxWq`L*G2G4 z#g1S5lGS`}xmcN9{>m0wX#c|GbqBUQ!%q{Q7)&4LsnU|ivrIEP>qTPYMZ0mWx?j{c zHEW#PR{*C417Af{1@ioGkmY#gHQi1P0t^2 zsQ-|{^Irhc`M1jFWpC`;xUoS+gddxmdvp+n=2K}~iUy$NY5Yww@|%L>pM7H)sHj9h zhAqA&F3V-28Ftf=Rj+vB1PF?^j;3&O{?o=XfaKR%?xM(MZ=zs9$P3tB3Q+<})Cj`) zmH>W&zO6%ix7@k@B^uK9DG8<3fnHj#^Kva=nK|XQ_$Q16&}?ds^+kPnw!XH?YW#lf z3O&_DgiYs&J>R;hw52iBJ2PLP!S;Ssn~YU6$U!dNj!Dcx{3F`npnk*q#n*>+1+F3A zP9JB>zYLGi+Zs<(O)R_&M zjh3^iKdUa$9z|^NnjcJ`eF*50UpD>P*)|1Qf0W;}NFz8|-*@>&5&RWY@gL(1ZXL@^+eRlZQWFA#kw)eLr|-}CQ$VTM zocarZo%8Vqe1H3l3FTMrRNY=?NVv(blz9#}sjuQxvNB`M{h6Nsz6z1W5`fl3^uL0cO8xTyW-Z>51vp}#yO)mntKsi z!m9wBuHQfYH*5P>iW7B`;r7vTqXMT=iApPv= z&dG|cmD1*Y84ODLpAX8P)b(d$_{ZXYGm&(m)7{nnM%?w-Jo7W^ziZ^bG9!Pi?f?4; z%ka=e0I~x zEfUqofAU@B>!v%!g{Zo|Udbn&#h7R-O&6z72_`><=m;;+=>HC&MhqvZ03LhxRV=@* zBg%f5qw;SEepdgxAs?+Xm)~Pr%o+3$pvA0nC7~99{NDc#(q#IS7|x37dH+Ab7ER3c zuvZx^ylorf8Yx2{GchAZWYq*EDzA~mG^P)8%MEY-1zGv)%I_ft4GZ_FQUVYSXnw(s zTX5T$j$ucn5rf(r((AO1#u8BjJPOqck?U8tLgRu?rqRLJ*mc!*8BPtoUt z!j72*u7FueqllMtgnAMX*xJmN)1AIqp&9}epWB@74j2-tW`7_raI?w|axngZKfC>v zqk;Y3xalwaLKR0gT#Zelj4v7a6w|~M-ikNuW)|dW{o3oD1(L`|_*lB@pDYy*?<78W zOkNkptis@gN1kC4l3YWv6NM(4gyot9oG$(m!_!s$5L@eja8vE0fO@Q-{}$CH`meb0zHLKFB~-9P{Wu3I%-UmzAv0Zmth}OMh11romQsdZzC7Ns=)Ho9C_Dl&=LV kF;64N^h<)~p3>W; #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1 << 20; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index d27a92d..71e6504 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -13,22 +13,34 @@ namespace StreamCompaction { } // TODO: __global__ - __global__ void KernShiftRight(int* idata, int* odata, int n) + __global__ void KernShiftToRight(int n,int* odata,int* idata) { - int index = (blockIdx.x * blockDim.x) + threadIdx.x; - if (index > n - 1) + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) { return; } if (index == 0) { odata[index] = 0; - return; } odata[index] = idata[index - 1]; - } + __global__ void KernRightShiftAddZeros(int* odata, int* middle_buffer, int n, int difference) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= n ) + { + return; + } + if (index > (n - 1) - difference) + { + odata[index] = 0; + return; + } + odata[index] = middle_buffer[index]; + } __global__ void KernNaiveScan(int n,int d,int* odata,const int* idata) @@ -44,7 +56,7 @@ namespace StreamCompaction { int d_offset = 1 << (d - 1); int beginIndex = index - d_offset; - int prevData = beginIndex < 0 ? 0 : idata[beginIndex]; + int prevData = beginIndex >= 0 ? idata[beginIndex] : 0; odata[index] = idata[index] + prevData; } @@ -55,34 +67,53 @@ namespace StreamCompaction { void scan(int n, int *odata, const int *idata) { int blockSize = 256; - dim3 BlocksPergrid(n + blockSize - 1 / blockSize); + //This need to be parallel int* dev_idata; int* dev_odata; + int* dev_middleBuffer; + //allocate memory cudaMalloc((void**)&dev_idata, sizeof(int) * n); checkCUDAError("cudaMalloc dev_idata failed!"); cudaMalloc((void**)&dev_odata, sizeof(int) * n); checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_middleBuffer, sizeof(int) * n); + checkCUDAError("cudaMalloc dev_middleBuffer failed!"); + + cudaDeviceSynchronize(); + //Copy memory from CPU to gpu cudaMemcpy(dev_idata,idata,(n)*sizeof(int),cudaMemcpyHostToDevice); cudaMemcpy(dev_odata, idata, (n) * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(dev_middleBuffer, idata, (n) * sizeof(int), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); //From host to devicw int log2n = ilog2ceil(n); + int finalMemorySize = 1 << log2n; + int difference = finalMemorySize - n; + + + dim3 BlocksPergrid(finalMemorySize + blockSize - 1 / blockSize); timer().startGpuTimer(); // TODO - for (int d = 1; d <= log2n; d++) + KernRightShiftAddZeros<<>>(dev_idata,dev_middleBuffer,finalMemorySize,difference); + for (int d = 1; d <= ilog2ceil(finalMemorySize); d++) { - KernNaiveScan << > > (n,d,dev_odata,dev_idata); + KernNaiveScan << > > (finalMemorySize,d,dev_odata,dev_idata); cudaDeviceSynchronize(); //ping pong buffers int *dev_temp = dev_idata; dev_idata = dev_odata; dev_odata = dev_temp; } + KernShiftToRight << > > (finalMemorySize,dev_odata,dev_idata); + cudaDeviceSynchronize(); + + timer().endGpuTimer(); //Exclusive scan, so need right shift. @@ -91,6 +122,7 @@ namespace StreamCompaction { checkCUDAError("cudaMemcpy odata failed!"); cudaFree(dev_idata); cudaFree(dev_odata); + cudaFree(dev_middleBuffer); } } } From 64716bbc7248f60a0d7ee98a556e7f045d411171 Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Sun, 18 Sep 2022 22:18:43 -0400 Subject: [PATCH 5/7] update readme --- README.md | 19 +++++++++++++------ img/Analysis 2.JPG | Bin 0 -> 60551 bytes 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 img/Analysis 2.JPG diff --git a/README.md b/README.md index a86d96c..f010b0c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ CUDA Stream Compaction * [personal website](https://hanlinsun.github.io/) * Tested on: Windows 10, i7-8750H @ 3.2GHz 32GB, NVIDIA Quadro P3200 -### Stream Compaction +# Stream Compaction This Project involves: * CPU version of scan @@ -22,8 +22,9 @@ These three CPU implements was used to test whether GPU implements was right. I This program generates a new array of random values with each execution, where the size of array is customisable. I have varied the size of the arrays by powers of two, starting from 2^8^ all the wai to 2^20^. The program also executes each algorithm for arrays of size "non- power of two" which are generated truncating the "power of two" arrays. -### Output Results +# Output Results +``` **************** ** SCAN TESTS ** **************** @@ -84,14 +85,20 @@ This program generates a new array of random values with each execution, where t elapsed time: 0.274432ms (CUDA Measured) [ 1 2 2 3 2 2 3 1 1 3 2 1 1 ... 3 1 ] passed +``` -### Performance Analysis +# Performance Analysis -* Scanning Algorithm +## Scanning Algorithm ![Scan Chart](img/Analysis%201.JPG) + In this chart, the lower the attribute is, the better its performance. -In this chart we can see that when the array number is generally small, the performance gap between naive method and work-efficient method is generally small, but with the array length increased, this gap become larger and larger. +We can see that when the array number is generally small, the performance gap between naive method and work-efficient method is generally small, but with the array length increased, this gap become larger and larger. The reason why the second method is more efficient is that even though we have limited the number of active threads, when we are doing naive scan, threads which are not doing anything have to wait for the other active threads in the warp to finish to become available again. But in upsweep and downsweep method, upsweep only use half number of threads to finish the work, and the rest of threads can be utilized by the GPU to do other tasks(like downsweep). So through that method we launch the same number of threads, but use less depth than the naive method. -That's why it is more faster. \ No newline at end of file +That's why it is way more faster. + +## String Compaction + +![Compact Chart](img/Analysis%202.JPG) \ No newline at end of file diff --git a/img/Analysis 2.JPG b/img/Analysis 2.JPG new file mode 100644 index 0000000000000000000000000000000000000000..cb3805673f5a1fbc9f967e4fe8b56a5c8d521b89 GIT binary patch literal 60551 zcmeEv1zZ)|*7(q+AWEkaf~0hFK&7R-MH-}Agd->hDIjpAr5i!I4uXJ)fV7el(%p3q z|H1bJyzk!k^#8tlzjxm!{zle0%<|002N6HKn5Xn@Pq6_O8^ywkw91}Htz5@a0&p5AzWE`SX(kKPm8&0zWD6lL9{}@PCm4ygYmyd;oy<1m%k!K>+Ft@lQ~X;KH!b zFEGmG%K#7-b_^fu6qsR$bo;4O{9ojrpE&=dz)uSNq`*%K{G`A!1voEratdA+5ai%L z4+0!~f;{}dZ+!;r{rdpdOr4A!E$y8x?QG8< z4rp}K&c#`T#>K_bM9|F0jOV6_2@mT{BQ6tG4h|C|RwENG4ptK`E)zZ;4t`EEj+-=p zzrBg^(cSG`9B+NOw}~;k=`B+mQ(I>zkQyB9H0*zS=>MsEf+RY+>2H!B#1TA4@T#M! zk+bPF@DM$mQr61?tQ`DmU#D4^{a5D*vmX-T#6$kiKJ|A|N>tdySkUr}0S*oU69owc(+~d~)B^aJ$dA$RP>^T@kk(9lr9@FbEK_;&ymAMNY~4srBz%0?J8 z4g{P&q0cdCuN1r@ROwizsiZX66=_R!+_?u5Rugx9{Eed+_klWB;)5rxB4+zdVahN=`{lOV7y6DtuW~ zTvA$AUQt*7x}mY@P4nB%uI`@Rz7HS!N5{q|K2J_f&&;l@uB~ruZf)=E9^!=rp!^N3 zBg}q<7e0s=GAb$xD#jsRNXTyBfr5{Uc7X%^thh3Uk;6F}P9IEyE1}N|USZL4sVo!T zbnL(;qT?Q=UpYkW3ufPknD74=W=Dt}n>74H9Web z#nI!bbco>};gfvyaB>pB)lKPABKYLkWyR=0M9n(v=;A+w8Vnz426eY$9=#H^cG}(R z?*7rI{b6E2OadJ2hHYt4cXHe3mgJ6T`E&AqoR$9)lh+U~2kTn8SoYrZZfc8Nbnkpg z=XT?ULs-gP&dcI+ui6YvQwDifx?LXk>cZIH2C=r#*hbOXHC97q^iZJ~y@4gUyHaJ;y}=R<8-_gY`y zNZ=`ld(B$?;c}6>P@=4Ki4WiRPVdylR_b@it>cINVX^pg^8RNxwg3FdGas4mlyf0m zeCBGyo~%j|Cc&Bfd?NdjQM{xi=BwQ&E*6Q%{*3>N2UA}9oeHyXHz*BPY7RRW)5eX& zSnsDXg_UHQuB^&>QhxgqW7S?ZJiLXSrh<08+x-iVal#Z;8Regs2&xkyn1n|id|}3uYc{!m8OS0r)9};MC9o!pM5-gfqcqw zqn*N8#38;XVswNsH~-<{2HrXAl0IeDJ>8V$feIF{%lB`oZjV!~my&nwA}T3U2_L7r zx76%gKFyT`)HUha1&P1Ew?%rD&^oMi*mFQYyS zg@lldUFF_aol_nXMta+6T~|65Z0~4u3!1)8K8;-vK1u1ifExOKeVlvFj%1qnHhp8h zO{NrU#EW;GOuWXsX?%EZ?=DQ*bm$Elo~gRT^RYzUao`;mj5R{f7~&pOzS(BA`EV?b z_;tK}hh67%tyt)l!ie@emr1F{{ej-3^2ar_?g~JS?Mt#q*Fg5j9aIX6pmG$Ec0y0B z71w)-^`|QGQVE#tpC|R7^6NeT&R}CT!DKZx$~?%qHM}0cQfF@q^swY7g*Aw?atK{9 z&YS;~Co*@D{gF3Gd1-ZImzs=s6KRL6u#qon2}DQ(*AuG_Xas%c48n*(FlsA!gL zlO58Bq>^V#R!B)?^EvA(YfnjsUKgfifr-G?~S>^jJxb z38|7|_mp|Dh>vwahqAVg`QsulbNT24;CfB)*f4NCC(-xrsnrw{dl7-0=^C1C1?vEJ zn%?J@Y!07;LIyvf^VcPqHL#W7MhRLmF=zjhvv9Gz%;rhm11aN=V$D4DGu{j^QXA6jOdkk+q;88v@fA0G>d4U!)xqeT@{};Ls^RFk1ED~9O zt*mjZis^M;3WSv2DUkw?&@ zrwf#r)cFdhZ;KH02HgRd@a^=~eO!!D7W|7r*$M?2k@B+YZcQxcmm#MT+RC1ZL9jLe zN%0k=HxNJ1RD#IJRk`KT9Xwe3#sR?9zLD*2gJ5%)x$%JiZpkx10tFlY1^>Jd_Nd(k zW83pF{j=k$1)cLt8&Z}HLsgxFY`qwcPgTn3oEbt4HkQ<1@zr~NNQM=@SmEyN*Lcn> z<6w1b+|6dh-Q)g*w;J@-{vwrkg0H!`>86Ks23ks;wX5GX$81~NvRZ*91Ite^d>wb> z;jMZE%h?aDlr3_oHCHRz(ovYyDAsqK<_scIEnisd%9kWqRyLOeO8T|QppEEYgB}>#Xf9BIPhn0m@~s?2GfK73!Nr&vz{h zQ(j!e?8Go(WVj(Y1?=5xhxJxR`o~z>8NSY*gDTdp##z1Xo`K;e^S+(!|Y zY1+Y6Ydd*$#bCiQ37pF7S3Zz5zjd^(Ov>6FX@ym5fmhRS3&oSB&Q-`i^x2n7)9-)V zx9iuPiO$-B^j@;H;ntC@ICc`E~}===x9%bU+U+@sG1ak$OOr^D0acOd061HsJi zs~AS}wDhxQKu$Tr5~=}67~hjn&@ zx;i;8^}MRe6Qn$_nxQ zjUn4hkiu}OV1hFIC@s^0?z@7Y;83QGtV=_~h=g#%3~ilZSTY=s<2}&5p-8fQPqEb9 zIi^zOiO>acAylDQltnzz13;&~y2J-+UU`X#q`tV;@fEt9M__q-9)EkPr#i@xrBS09 zOSy0;p)sF!ftPAMUkS$06@$&lR>WcoN44BW|EUuNHbGqhVtiG67U9xAE;9yf}9(PWv zl5`+6aC{}n&8;+HEQe8B_cO>R);`dSEXRor~l5NXFBF@!?84oN4HC(6()#%$`)gcN>XE zC9iGExh@7uFKeAFjmix;3w;T!w^SGnrL=BHn}O~1Tz2Jc50fx-8N*T0q3rkGO!bto z`t1uoMEv6wKFS0)7)b%_C5u#q^`S%nXMj9jVqC68pS*#1E1LJYgI!KrXajGINN69ykEn-C;`w zVrM&)z4As7o(%_pm%nJA-2osnb^z2g0CEx%o0ZU=C1Us{pOpE@ zGe1|;Pf74oM*ipR9t8fgv!o=P@%JhGbpXaB|1Zab)uF{)x(7nfY_uDNKU|44O#4hb z5UK@RemvHv=)+Sjqp3)8e+=aV+8Yi+da+OAv<%VHjESfc%w*XdjsRK=shv|WQ}x4r zPYB#K>Hv`B(a!3nUYS1tHu??WB;816-|H|-Dmnl{_@Q&47lJAB~<$d-lk@pfWO;ehnaQ&=z?3kJ|4oihgE`acL2IO4P7V7v;dO)8NcU1z6c`m<^Y(B zIRJPvt-ks6{e#ehgET|tqM^K*Rwv-^93ZK@+_!CLk!>;V*tbGDGV!~@{HMrzJpoTg&uuw?gAZceYZucYlot@@IN zPbPs0Rxx`jwN}Ooe7z&Km#TDjS|%4ZKE{rZ5FlB7@6_bNdZ>4&4}e|#OerNlXBwy;PFs9#NBXI|qQ;-?j86v!*_Kf@K+Ylfj&QE4N( zyNH!DRVd-#CS`Q#<7#N>=hMLy`{{NJB(3D~)XJs3MGK;&2B!YA%q=Beg4b4`-N0^>h3X+WPKYn7OR!$hqUpN|(ymxe z42?kfyAFK5eQ%IjdLuiza>^&*@4EQ#Civ|8-ZJ0-T|32c$|vCO8^ClsY;v}}-mR1j zx_?KCO3f!OnU-Oh6yj&4DKchhfbi5u?(wf49kFle<(D#SPqmgYuxQIJB5l0`>2cAm zw$)0Hw~lcQEP9~9TC{R6d<3-avZ~3lKI+z6(fbec1oPM9#k6|8yu3x0)EC+6{moxn%p&4y*-% z719Ku-#GwkX&=IEn&8gVyV%{u1Sj`Y`R`#RWRTtdH9KY4E-uX57)-9JKOop^YZ?JO z!`xBIv-qfJI~QzN^GxI3X>xdUkbDYl4gsPrs6{A;hKZpFzei8~R)%F`znvw;G&Oo6 z{)Q&|1GMQSC>SiE>&8lG(ckO&@$W(ikAgDMdOu6&7|uY}`OX1oK&NPk0BkM>3Tk>M z;P3W3(Zs-QU~p&1E{<9;!N{BqD6XG_VqGWo`&GRd1cVku!EV1dHRA;Q{R2cHAavjW zmizsw87JU>J;3wI3jeolx-kz12pIK4USwv+^vU$PWb31I{h~=soC39{j)0tlGs`+m zMpojh=oNH1+TrNDT~^F7$YU$MxON%p3;AT&5`p`02{Wy5zSo~eQ*r>PLFbaey-qSUixM!;O^HB;sWN_ zbp)vm4K>TYeH{;`-?hJkfsryls$)=2;zFngJLs+FM93isjv&Ir)Hm?dwKet$*r?PI zbeQ3GbkbkqbY1A@BcfI|IK+uQ-6%;JHK}=!NH?$x3pO9D+?&WR(2TRPKuK~tbgel zSMFxe=TnSd9h+6AVANW!+H;=0tzq}TdEkDb*Og6eqAevmK|Qn`spWjy7a1FN+wt!H zBa#L-HV#gn(~8R%b~l>RJ1b;A?ma2h+;Il?kg^8PHJHQ>7^-Kl3TTGz>_n{^E+>Bl zGA(lC{R^&5f;LSj9f`w7j~%IFk}4l>xE9Fl;5wX|G;D^}g&aYLsl=;>2yfHG)G`lu~cJhfXv9!0I~Yn_|M!UC?^br#=9#@UVX4 zDP?ql$uFW?hHqpCu4K_6D$!vjFvDl+NV1pZd&o)o~pf|1Tan!m z{sGQ{qI4o*qZJaPR(t~f{Q)#wtFuniN=1H_oC#^hhI(JE3z^&22y(JM0M0ED6Ycd1 zir21tfR^4_h+(d^!llX<6LS@LX4a3%qi~iP!E5$!-3Fzvm*Nej*vcRh{46^puBA*2 zm`XA+y#Yf8VTaxo%-^$nkBruT6}dq^5s~m8fGdMU4mkjxlSO{(koeXp)v!7lt^l-? zzAr)qWaL7|+o*RZVPKHz2)s-GotGms!?xFA5ap$-U>xNL{C-To@dPWM*w~H&f?orJ zzxVqwYQ!OiH`2wN57)4yIv?o6p8 z`N>-!iXDEnvrnq?LJ@}ZUE2sS+B_8ri8Jj#0ABAkgc(prx5!;2@uD)`idml7A)8^g z+zVCOfxT~}+K)v@gP6Vk@D;ri0;gN)0LU!^C;uL}!gPK$`TxYV^{>4qK%Qa4ZtlYU z-jxsS?#~`*Q{FNP)>PqLaKlZ<4|XcDZR3cZWriqsu()VyR>`4sQx|F0J?oVUsbq)m+ASw9T!MduE_Ffu4m~Ho`88Ne_!Kb@+nRyZg%qW4 znSzP-@`x`Em44N|La=%<4YV>vI}?VY5OBWjLvzy5QgN~60Pv34XWWy8x*G1SkBok) zCH$?s{&~YEm6S;o-N$FZOn&ss=B|3y31=m6^qf*BkUA{MJfaWv5mH~Y^dm|T91-nH z#phQ#C?65+OVQw0GW=%@pW9l;lTwQ%AW8AMzO*y-2>EZlCnqTJSD4;^k2wiV%dXN< z$kc};GGP^7S%$~+dr~a^cd`TfF|sF_89UcL!8&kEv8*1hdON>Z?uerwoJ?x~&D-C; z9JA&xu@-*p>6votyz;I-799)Oke?myVL!1(h>t@6VCDD!F7Dc*G`%`4a+l2L_)4!| zC^cmD8T4Ega>=cjTp!tv-;VZDuCbOoHPC;X>i{4>0Azc^G$%c2nSur)Y+G4r`bVTY zZxlX~zEivt(`@%Fgl>J&YUu=~k^Zfv46YVo{)IG0qT^e^cJ=rE@Vf#vM-%iQJQlEr z_V8_iev~6#dvwx{#6M!sll#`6w}bq9BIXC5{&eOL60LA$yd>mNch7HFGU{@0U4_mb z+;ph586Dk)vS-Fa!DY>)N%iVo#S*Sv>^tURk z7?H)_v%tP;L%;W%iCJ-b?IEH(|4{Ax3MOKG@8$kW2w_lT%(FQR7@mNCSLyrpbA01= z>HfwL`c_B(XKI?=_I?PW9JZztnRE<(|K*0;`{9W4^fl1-ItG99a*jKSP}Y+8NLL29 zAbkH3Mw<7UHwvt?h`R-|sG1jt*=@-HB5+ z_26vyHEP!nL%Wek=pL^yNSgFR&R7xO_R1~&AbdY;Z004ax@&&T_Sz4`o* z$E=Q3R^$NacXKTzZESzpUnJz^^o&}hno+FfDVWFd({3mD6YX{lVt;meq9-%148+j^ zJB(7x3@fFdAaS-_Xe#f-0F&_k@*yOJ{A_*e)q9^yZ03CyNQ6trpt&|O^d2<{30Ikd zU=A+(6)};#4wjpj`*ni~uJ{n|Tis^NiLn`qrVA?sBhgRlPGr~o(Z>7bDv2BYh#1?7 zJFa|Vp;;b7aJ=~O%Q4`kL zzpOVnDAJw$P6NOQr*|j{#-k;W%HM$BIxDz)D4M>Nzdv0Ff20c`hAx1>hAxos43U`# zKsk+q(?HDGN?8+NGGOaM^x+-kbL`cn($7s(x*EM5+Wif7wv{&@BcESKeo@6oZlxJz zaK23Cbn|MiI?R_4(!HinXrnqL8$AQd-*_Gb@hIOKcn@w;$9{ybJJuiWPe7r5-Klk& zlix+?Qoi5o8H&qd5cf_;an*=^G1SF3wHVPM^%_L|&Rh-V&xdAiM9}A@$>`{vLMoG< z!Ulk|z=E=SGo2T2dkdcx(I&44i_N<(Npx=$BwZ5zX4=I?iZG;?iS1LQ2SC?`?mL?U z;7L%($IXpBSVf2^+@VR9wr_VH?|H$YTk>Q;=VzkipO7f&G|Uh!_tB;ZM}wJuWk`r! zYn0j@Ap#K`j<;A?n20vRqOj8bu!$08jZuH5*?miJqdy{YdyEjW+J?U+HQu+Ho4Hq- zvFi=(P2&-n6L;AUnl=g^MtwjCUPq|rigNeT`rjS$zfKUvKcQZn;P3DET>n!Cy`$5t zH-(q3MoEv|co*B*QNrdPgQ3aSvg&tnQ}AK~ad8I|Y~<193Ev$6A8w})tdxTO>&M|k z^K&6p3#G!l=2Shl!~K1V)n!J!7rV{ZT&2e+Z}_J$&TT1C!DqGE=SQpk+jPs$&fd7L z#C=!Dr`vD#Hq}#BpGc+$&$9wZpZhY)1q?j9$N9?c1#eHIR+Om{(s`G5VfEd~eZ|qf zUbg4)Lac)8xm8a*+;)&T8>B;}H70CknhcVjv;|UR(BU^-?LV1Drxy95xH|T7RS>fj z+4Infauib0$?T6}`8WlfCavr;BzPM^AIQ?nnQi32io>HOTE`u@q)mSN`!AJ0}Eu|NbGe&a>BJlj7-(2IjkOuyBi4YXJOqlO0?n0zf)$tvZYE(t$l zGD@<>Of{ktb<}z7FEhVjo$UVrcOo6v+*sw5Ow-+aCg2F^E!|SAK5ohO-^vcZ!_2_3 zt(sL_;g!enOmv)jBX_oDgZ;z;|BEk$-@*O&7BbRt+$gJLX0sgKWX~VkTbk|JecYGu z-;n}JLtgQ3J9T3o(`hALnZMIJ4Iv|{IRBw(g}emxy^QX;p4X!mT!`8>Qtet1*1Yzp zpfQuoPaR;S1oGVK$voxlc=PYj>Bfnd`b_ewuz%8<>PmQ9dL-F;d-QYBy4Ge*6?I!T z_A3rc%>Qxe+_XBZF#q!YH0fYIeQ=rIS{E9rTA-J%&*n( zKR#FHhI<$@=JQ9Q56v?N4fUnYHre|VCRo-B5!zKee_^j^)SLd^xqW^dvM{ZEM9rmv=w5s z9e*ihpzxd}^Gk9q(8!v4WK*Oz**`+eSMzZ1k$1Jjj6S`wyRl+qm%MfM$Tp$|D(dKY zt^Hg{x`StG;CgdNJa)HA9n%92BGLx_|!6V~* zgr0=C{e>G+7Ya=jkea;9_{aTgD5CL-M|7pkv-;)H+c|j1yF|rNeaj=31y?U)jfdf$ z@2irrvvc6dj8T2*-x0o`qtSM zB8qc!W*uh8u5X1oea?Ra8!@h>j?-Koes(b~6}@67pG~XXu_@Daiu$2!Kh=aDLT-1J zBcYleFNP=Mx4e5_S=o1Yb3~yadUaEo;7s0>BUbG$uFNJy^f)NcDRjg$XF9a!cwOyx zBD)l)1ese2axNpoF1Vg;D5L$nl{<3QlCM9v3PrK9^nPvYh2XId?+QygiSN$%V!fGI zyBExSd7A7&@+UZ-MD?mse6NrtU8Jr9-C0g(3yrT*6S<^i;olf%i+Lz*VGX$W#^i?W+FqI8L>W}UJQHAOt!ys-rixBIk$KPH_kJI zoLQsQ4K+LRO zQe1%a)~VC!(ZdBDY&kA5gkAF+IL25(c6KIJhA)R`u6|y@46QLP7GO;OJolm7wVN7j zOJ(H>R!pD&p8B(Yo<)U%d;rv0deLw4*j(vno*~jsynnUm1vU~|GK=WgR;~~Za{w4a zvglsS-!i7Q(0b6G9vRR#BEt0fZ`tTSb)!E;HzJJ7o4qI9FR4G-1^N3NIoT$3^w7 z(&47RppG-+dqlMmc$+ddDiHTn>*_Vw<2k*>YWjw5P8}I2JtE0oqpcasp9Mee!(8L- zwdl4~>CO=DAWM}vZO7Mx?}Zg{bG^9H(S4xxrVS59_v}kjN!H;>Zl!#v6PVEwrt3$btn&-_-@nX;$N$$^@Q&UBJ#4HjWSC^T zL)Tcj^mJY^o7`JXTgNa{qzmIGSrVf({kG0GbKHSNj^+W<)61t)6-5(Q@d~mcRSN4q ziPPc<8i?yr5A$Uv;uNVFIDti#MKG}IN)XjCc<6STg6!|f?c{HFd<71DhkxDEPjq2W z{)$nzMrT>keH;(GH&wn>D;tS8YJXt_e{6D)PV|>hFOO(VsU3JDDmf7`9+E5fl{|m# z3I9QQob)^vmWSc3d!SQn8F;HMsqBF1>7tzoYs43?I%xT?EwE3*#-)y+L+ANn4*TCu zI))D00Un+77dss|(eOiEM{`fle16rdah&`hrM`}&O>y+Jz{5YwJFzz+7Z{wAYg+d3Pw*Apt(!d{W~J*=B+{sgtCC?IPW@+~B znnlm>J+hLWN!_HjOEAb0RBAJ~@1NCs}T_$+zA@|LNb z_GASV=ETJ;JreA`&z#WTmcASrH!@v;kWwu4V;`&bWbawue`{@-J!e5Q5l5}qh*rh0 z)h5!u9AeeqwcaAaHqu<+W2CT6QCcn{e%HHmvI*47>d44Bfzsi1zaozL!0FuCO^O>&rZSs%n?0`=#KfP#-L~k-f-R{iWcB z!dtylos#-`$YDxxNCK<3D`JgGW`A}=UFPC^iS9WCR_5vt$`XXn5)b_&Uk>?5t$JX>~_wF$aJ@(GAK(S6W8FKgUyc9@ew3G*(iYOZj$(&}Q zp4V-an(SkjyKn$R>E23#Ub!9JTl})L@Vhd%re{##$-DkA9rq+cx0`SDgd$OOsLOIUacV! z^SYGk=Dmo*yAhAQ)3eE@BCKn4td^Dy@^{UzTWC9}-^pI_4%lBOGCb3sA=`@}FWFN? zc~B9~dTClxi$g|T)+s>r=`JCa zd8_&m0_QX0#@uS2j%>>c6tW6h&K0RWAI=#5oLCA| zB#_HU3-uzCI@Q7+=N^vZtL=)3B7NtDYnaqx)Af5k!D;@2gSdQq>?+VRa@L!~MC}<3 z8D1}2<+JXZn++}wmDx06kH>E2C}K|U`ZWbj)O_Uc$}z7{W{HxNPIGe=2*ROEpCmnx z^lDbYI&X#TIcti%HCU`g-S_JJUEJ4^Er&_R#oX1RtjyPa-;OL^iE)CD4tvYp)wNCo}?Yaun z@&h39Wl(46d^L;==&7tJei87lD(-?oLx^GGlD6Rh@orR0k~Th=Jr;r(sdR8G zG(6Hj(>v+;v6H09RmAJ=qBZeCe@%5`1Zh_es4BF@u$+{Ge}Eiy>9bgW$PH#FoaPV>xs^A&Bd*1eLbo6yh07vb~w;Aso%xu?0V z6m~X;(OcRYE)BkOlWv+r5T+{6PpNJPw-mw+*Q@14QZ7NU^tYf%GdsJ4IoxydoP*-V z$XDVvgB3wN0WA5P4pdW*Y~oItSYkMIQgD0?+5E8?Q(wIxKU1Ust!?wB!FF03&OPfJ zVOE+6YmMEuO?$2H)g{ly6e)8)y_xs1)W8z*XrCv2J6ox|KE!wmAWbj#vbwfnn zvIUcSpb}AW02rh=$yzmQL-ir~SL>_zx5mdJQlfAuwi=h2=~js1XfCbs-*i{P5@^a7 z$d~MBixs^f+i$Y#^*XGq+1;#Jl>-lQ}`|<`!1$I?3r!>*quRks(AKCegwWA83 zoMvKB;qIAez0?&aWs!VS<}72sjJ`5~>9b&fTIW^xYhtpeL3X3rc136G`@%jODWWXQm`BqOQVkH1U9pCePng>nK+NzXk6{`XDy zwm&*I{p&XKGb{W{&C1DQ@{{oI7qq09t6gb(r7kJM0BD>-Fa=vi4~P|qtCdj@g**zM zoeFuIclqh&HCcz?$({4aGPRU4In4Qlx?YL>)630S`oWoYTg33GX(s&9xdp9;L$gqTsgfJjs$LW zt?gw5#^@o{gx$TBvre1ZUQ9C2ZN`%V_P1IZNyY-LPI6swgA`0em9G z>o#6pp|clq8-!R!BzB8<gA??}5>z}_L@xYA#5H zNLx1jY4aL;)Rt2zd#rij6otlS43a2YS(IW;Wm$2sz}e+i`%XzSb2MdwxGW`f3=3*^ zl<-&_wSg0UcJnNSkgztpX6y>O*VzLaZ3;Yp)V!NeX? zceDZ+6>ta$FVu;&7EvXLn_z_CiQ8;7HW-%=Drz|gc4JayU($mX zR@bBzIk#fB$hOozkQrGTly#Gy8U)(`I_=4HBhGeJ!if!Ax@R_(Quj6UO?Sb9l;?4q zti}dYDxqISVS7;uj#4yK<6Cmec~74Aa5KM3GM}9PWi!tsFve)DTm*Xnh)TLJ%UVa7 zegyO1F($#?_SVg>Tkfxa{}~N%Oy?@%Cd19PG)gYHoZ=9E`Et<1Q{AI3DBX$5o}a=c z@wDu(%1Q`J8I7J`w$lfm z0snrFkiTE}wQUsAHyu4Qz+M9RKIYzCu4f%ZUBgaw~|MHMBcC4w?K>ulriv+9XowQkb`0>@stw zSx1Zgj7lC~2)a$f2zVnGPOZ=2LkjH*@_nWF4bxZ?a12##*Z|ps=Zzuhm#=n?Eln6^7^LND zTabAL>^#Pc;Zx;q@(iB~?^@1f@_i>hy2Fm?WxK1}hLELZyReO0oNH%|mU2Z%ON9V2 z%LaBv0CI|#8-1hBDMVk4eyKnrcfKf)+oxvPI52jrU8g%ukyCe|SvVbwjM#V@Xik|g zCadfq=#qOuHg1SRZshtRiM1)L=7!#j+FM{IKZVZ4hiUrFnOD;+H>0SzWSk?XL)wSw zYuiP3@@@_^tq#u0Q`S7Io*bZHo*ZWl4_sN&Ro?a zN5|oiyMG#r3d^T0bQe%YKq_iB4}kp3;f5966BfceHdBCo-%I2IhOSDo=%#x6277iM z0bS*4@Et%e@&2YG&<4>289ye(mjs*sBCV%u_y>*fNV#pZOCZb~^G#c19P=A?VXKFA zK+^)gzfvBK%;~>>|EDMuZ+h1&;j+4_=1{b%%OzXBo*EIRH^f!!r`>7!rxgt%@+ANo zbo`7b5AK|9kVF!#XCPE~J522NaWv?cRyG+W#ofm2;*)KX?$Yhi!}ga05ZeS^VIv54 zi38vc_LtX|M}kq1I`+L!%A0b~ol#==@|PAFg`hvE9TT?XBlVFLY=C#)aI5x9i;QNw z13;Jt?9zfpZ34D1ipPhJAb)9+@t-)lvaYUJ?~}*}oU)hb5~uu&Pjq@Ze)^CjRKK)2 zIUNzjRSKB2M!BS2})aqH;u=|Af)i%S(K0E;66GasMW!coQM} z(L*OK9T#(hFR=fL!G9qqe8a`qg#%)AcQK7g9r4}>?8wU&r3J z#~L-&G!)CCN4>oXhuIIQwXKtNZ4_&5=&lIZcc9)bx(C*%eCJM>AOLtXN*8*zU}Y( zcIAV1oexd1AAze@Z94o&`uv_~Zs``5N}K8QP?4X~JLiLn;u${U)3uTjJC5cdM2zi} zv4G}y035My)|Oq`hpUsr@MzQxdvx#k;%8It4>b+|WrEuxK`2&oS=pJ>5|4Z*qKPJL>6IoCh0StTK zFckTZtuBsqwV$DnL!T6*RYl=U0CH|_!{>(jrw@h)*|yFTF|fT&1O&r`a8oqUea4=d zbS&lDeU9Amn|)MYbN`|%DH_N78s+B668QVpOzqx6jOc#kEK&3_fvV zqLRMlgU+jD9-`(^B<79cr}+43fmL0fg7j^yTPB65nPIw*+3ei(S{c>Z3OyW_7zA7b zxhROSW#)YH-A0)`7j9d(JbjH_V}eYcdGm?FhqD5v=kwHQm5bJg=2`}J4`25IHfEvj zn*p5z2u!dO-gYFV<6zx|Zkl+Kh!3+^XPapvO$V=;WC(@&kdV+O=J}Ql;V0B-6hiAH zqiwP)GOk)Wuf9YZHP-6qxggf=N*5;6d@)Zhb51A6bbd^ptf_ivQMgXziZgYPRc1Mx zO=x&V(vF+i@bfHfOC^sAzjyZRHcgScxarF|b_@E}I^)+~?wu|GJy9(U6Cvt~v+h~E zmTBV7CS418E(=l+p6bWoMyIwf~s3McOWgHNub*Dgi~3RRz-}`6Gy_f53e?rbyluY z;6M5lWGvpHY>zRjn3R&3bl5jDV^**&A(hQ8FN}BoSqpViur=SFs!or4C-(tRPzU`y zx#cUBA>KaKU@W*C@Ofxc2R7F|Ou#_jTDZ7w*!R4WrQ;QTn8#WEzHA`?W6b)jA_4vD z2b9=tHnK|g`sg2??v^I|3`uL*h z>ISp~JiK(e`i1_Y7>j~_gc#sL=SnFJzH{z`{qZ(RD|o6NL*->jS(*0Dx3!N@p9vG5 ze}aS|&|Ld!GE-)pS?#4vmm(~pM2_x#V?k7@bZqj=J9`Qw_8JN=GyI0>YEI65)JfPPvPp+4ik;yG-c%d-ZuP-6N&G4#PUj8~xGdE^_ zlwzk#Wbm+kr{HTF8mTv0_dW$p5cZ?ndrWg_^bVVh$n8)CND!!y{K{_z`p;+ulNMdE zMPwQPR>CVbPO_k_ZJ4R^e4UF8%F=@+Y?+vBqr1ej$D%A`zDk&ss-i3Ltm27ok)5*M z0kBV>$-eI~y_?&V%vwUcb}?ij&f9K#b9sOfvA!i+wlCP@NxIQB>9_E_%P?FJn>jjq z?)9h>*K{w|982IkNUFJfhW->W6xdgdM zzfJB5W`3~8(g$`4#UF;7oRPF_9wkk;X_6@kf>nFC(d8Zh!LiCM@^2WmMw4*}@fp^`TP~=XV*$N-T0xFUbsF@8wX|kiO-RqI@6p z4c!LA)e2ua5i?bPhUX>V#dtbUV9hQB!bQHOxfO_C5U2Zuo6-k40I*I^rmk*#DtJ5v zb5VNQj1M(ovE{*CAG|TjD{=*W^Y0T5ootG|V|oA#_J?W*5e=V#pr31UjdM z&gr|SH3|e(`!;8QuJ3kc^`Ve#XF|M>cf_-yO+$5-ykLY*H*Pg_5sonmHq~(5RC<*2 zDVU^bImt&=c|IL7CO1)JI7!ui20`F$r-?A^oivLZh4WpvSP| z^6x(rNSKrp$RHp;tZ-P-Rd}Q)fj%wF6Yo6wvG*VSnGP|q?-blGfjMe6E)%?8iXh@`8j@5wbaM5HOHXlvflfXR{fJzeA6%`L?aoHEf60jkN7D zlj1ok`+mBYAKO-{^ZU&lqwftASy6`kf-gx=OTk>2pE+4X*K>!1*uQ{DH!p{Le#Dyh z;pdw%`^ln1i3?M<^Ly&2*_K}IE>!s~%$pI}u+CNw#n(Ps)m zFQIE^ziBfc4hDs@DNQ)Ba>qlR9xBz_znV~;Lx+%CoU=4m^4&-)wZ3QdfZ0+%UWrp( ziEVN`)+bP-I%uILHT{*R$)ud;qt~GVDwR(I@A)E82WjjkX+)m`9rq7@&rg2%Jz?#3 zhIUD%H`{*xf-z<3Cf_ey7Z;{PHKEm8{%e7@`u#MV9vFQ&@9DZ{2&lckS8}x{&Y){<$U{qEDNgRUV>2o@ppNh=|Fg>lbvNu2D(p zp-Ypyp1o0f%FhgA+mkHs7X6A@9DQ+v|1A6cXp->(FLayswneS*GbBQ;SyPs6lDI|G z_|DqJ%$3@-)uj)HF1l1l4?bc;HJlSJA!XByzgJA39>U$X9B$RSS=vRPO_E-U&m@Ph z+dWPp} z8_GKvd#2`AMG-l2tlFAbbV`^qd7EmS55~LtK6nRftXf0=TwTS3=eLmJNgfB#(mL={ zcuD8jpy>4EY8n=;pgdmW&gis9+^qR*(PHcN_y&P_`17?B0k)gX;N){&W+(u)rX4yli{ouWf#%#grWk=ZSh1`UVTt83tx#IS##iep` z{5HwWU~Ilm#2%eV=aX%fjk`DPhZ!ZknM@P5;WjO_4_wm21`G44oQV{S!0Jbe3FDfi z{g#-rTsvn_)Sw^ntCBRO0uyhUpZ9t}NH#VQ=AYNMX}y*AiCP;?vcD!8=BZRnt=+e8 zRc`tr-eQYQS$iRWdTy&&7c_P4un;`t?lIR4Dst0D8M)!HdmNt9Zaj=Rt}ABut6iia zDA7Sx=A#>PnNr)d)HCYyR#4_WzXk9eBb4(g|$uy8KmGnL&Tl8hcfD4-eydHhYl_!AmZ|CW^~<4Wx|yl*2GoyPsgFh-Kt# zWGr!KE)n(4Pz{0XZgQHbVngrJ`S@R6ou6>hiwfA+r`WxYBH^zA(=Cf1S>RoyVIkix zXyb^Fog$1%bstda5{6{9i-u3m$_uv(HBba#w_QO6@Z-u}37lUU5tdC=)y;){g2wD) zEEuIGY~=5wWBpd=+o&Y=J$OU@KY0ZEc`ECI<$KtPF-L?lVhISYv7 zP!u`mOi@%7bAA2Zx4!pg`gO0>(=**Of3S+;ZuU9%+;h+V?R|b{@8%?P*$C{1iKPrT+i>%&l0^Q`s%ePrrKy>QFyNAT= zkxYsyMnX-D|w|j;zq3vizozMFzSty*zV)wwWrYeNhF(Wk* zpCo7J^&{Y_lv41f#Z@Z@Ve!c%Z-?N|hy+-7W%!#a>f3w4KQHd(QWhw}`wS@jckPGE z35-+D^&(gSOtXAziFT4e7F@Xgg@4SjpKGVDKrrJ21k?00DE^a%%PyHuVkF?Cy)!g` zuAG=mSf1VHj!|2@!G>Uc4sY=Fc7R-ssqUd|j>jMahu5G`asl}&C_Tbe&fig~22lTX z_HU5M&*pOvO!hFTkwc>rfJ_ebMAnmtT;lY5DyWxoZ&d z3GsZ(z!4DrqQ;PHt^&AUf>Edb)S{*XV=;oRE>i)D*|bo*)6ZvakEi`5B@i>Mh){8S2(Tc$87iwzdLNJM;jKT0YrE1gjPw=p{ zZu%J$#w;{6T=O2>3^3{5g)Jk?>~Uwhx*h=0b=X@u9@woJL8la}0WmSdk!YyP{M}Z> z(R!RMlKBGq(U*CmwG|ak9AcvC=nmI4Rj=GN?ZYp88ob1=3-@JHW&rkSrd|wP;-J}; zG574Ah%~3}1A{f@-H*{PdT-reUn3qxK^D7)1^%=iXRnou#+e@5j(+SQ&ULRRBafhl zRy<8&qc=KKqt}Em-i2Ekq=bTEH54DzGyWd2cvUYjW?>yZt z?nU!P_sOHry6F$&hJ;X1JMuA~Vh2Vo1paJJUM>s$9h3@_ch1_dixihb>fd)}8w?dH5D|QxA<6O-_^{oPs-$Mx zyDtzzIo=pKc6fCzRK%rzM3+|3zuMH}8|1{f*K@%-hEuSj4S4I-_F5T^8r)sbwaI}s zdYRtDQ^0;GmXEH?ytJbcH~UU$e#V*zaHg|5>h&_OL7tbb@ny_ZP||h(MOLgx1g?s>hEDLhfn?D}#0`AxSn zUz$ZxJmpq+lW`@*9-Vsd^|PmVZqEa%q|79!{nO0l`5pZ$KhIF@>FnH}9oecWKK z(sj`r4jc}LouXOh4LHgAjukd7-iI>l^x=$EyGf5|rzQchcO5NepA zsx_0Av@bzqnNcr!iKG`zr;zLQ$+OVls7H>}sn?~WnY<%_a|8$=;oF>hrl{cllKtK_ z!pCoWdp7^WT?GnCTAwM$YI!Lzn_;G{an=HCm8P0p=K|U;at%VL`hjQ9;zn06z*1y3*2dw7Px2*hZ zfCctjZPqd7;eggL|=ibNY&H4N_RvM>Vg8rQ$pM0|VY~QcfAj4}=8f}4{E7TAgzlfYgBoIFB z=va}ryT!lt)NpoR9(V3CTEV#PQc_cTXP7ZxKF`prOlQQ(njfMJpJL9OKN6qLiXc+G zMID$D3*ZBGIG0&fUA4tHW5nWihnZkpfK95A;Gs?Oj&*z<1PhxRwAH^{_#?64$4c4R z2|1@pfP+1_EPlDt3*6<>Z^<-Iowi|q)Ha$WEU9$WP0EeT*B}iz98=1x)BPt58_1xz!7%K!c z9(YbRh>xd-r_XkLfReN4aS_8R_;db_cnCl--6*)PmHvd-ifbL<{Loy3U?_DT>&f69 zcX^n&Hgx^|E6LiM!1Fqfy3||2yxd(l0e7Nep?fCm;1i;;-bs(#JvvSaj;i*vBw(7v z*|sJ@O}Wn!bhyL>X~&)`p@B%bvk>(X~LY*eUZBmSM|9^06c zZh$=)iu{>IBU?(ZPJk~m{GR1;8Osp2S1QAl2lh=@^?@L`^zKWv zpdPRusDYs-lWCX*9`s}2o8&Yat)>ILC>=&z_c{MHDCuq-j+bw9=*oe37D`8(UA`k7j9w;z1S%=MX2EeVsqt7y*Uv-=Fep+yu@9knp~l7vRnCv7#1c85AuYHg&zSy&X=8yQaUKEY zzWmiQ6A6BWsU43man*=-r(^bdJCZi81NBDnyEad=ELuHAu};IGAqq8zT<}nyQw;-2 z&j?~=V~KzYo?J;5334UV!uR9w*uEJM(Ocqt&R3TK%o0{(dYiG z-d!Gy*Ox9oE=x?QG0&}i3mGV{t5z1tW_&c%24d!+H+)>^URbi)a-2j}X5CsBbp_GQ z!btEdl0kiz)jy&{M^+KBD+h1`gE*umWqcR_-U(L-Z-{O`0etFJHZVekpAS>It_YqB z1Twhf?(mbtOntbvs~4uBu0Z|tcLCn5D^~{1DHG-|sj}L%wPVI`-SvO#%j#8&d?iRQYIczZc5682fNDocvO| zIvALu<|g&sA~`l;5qt4wga~bRd+ngeAEoyXDgvUj8<6K5!f>V)*$Cv zq{bfGHHdXNct#Vb_?#6i^tG5C@w_AsIrK~`5q*^_k9ydk2$TeS3XE<508DyfeHQv! z03?Og|Ezcc4j>h{mG;#j`L^wYmMXPYcYL%3c7?H;P=0J=(32VI8(BoV$xE4>YG45yOTejN4!vG~Ox7ZIGg6z5n$UJr)Oab?q3jb$nAvZqG#Xt|vK9LR4iL$IpYEHUJ4 zvbWT>uAv$6=?SDca{O)A?KPeIPGT8NbBNd-ghwBDJdX^|#nLt-?#Ph!==ksi^?12Z z*Ikko?E~O@L&|>y`TzL+zXRy+Lo{CTZ?mJD*Zfm<`TWy^M62V1hB3(nSZOLvwTG z0UCJ?+R-=%1KT*il*TY<=vh%fq%Q3saXBIj6QrAUu>&yV<)=}=Dv2q1i@|}$c`0{g`PB#bZ8*BZM?T*4yOU0Y zpcz-+))gYJm}&QQ`7)j27Dg+c+7CZ@oe!5ATVcZ%59jQ+~{ zG_1X$edk-@H7Kwaq30+|*CPq_1-f@- zV3dj{zvem5KIQ^retxpsvWAhr4L~j0qaD963@-Gydx+Httcbv!9+Y8v8XKPY-tvSZ zfN9JPh_@b60JqZ-hh+>M9*154@t>EfbU5{Z{6|0@aMHT;oMq`c=zjNx8`*_wJ-m;R zzX#}N?nR+k(WhWUq$xt$3Xp#X(3pX4QkTv~O1u;Q-5X@L8dp6ue#0;>=uDr|u%Bd0 zpW(Is?j9-Uj ztqS$GC9Tf=_nN0Zz$}iL^``xah;OD$ET7s}15keKY8q3{3^3l#F)4Gg`d*JTMwh}y z&A{gw;$DKzqx;UM4ZWDL(w0cc66MJy3vXBj(mRc!{#Cj4SLkDwVAs; z-iS9@1mzioc5pe*AD^P#0b{k|2JPVF@=dF}1~~)E$1;0CPF#@&MjbifxGoZhkTq?Z z0yJ^~!wb0Cx3c%A@OHko0;zM2CCL1Bx`-pZY2Q)keT2_)g>I-Oi zmxYpbJJmQd1pf%$0f0b8dN1k`w`Z;S**IX@maPFn8Cdfd=pIH5!_wgM7+FHRYU<=~ z+j`R41l$gB)xoQR9VpN?DPR&OB~GV@Co!gfmUO&_IykTbJd#@uSqnN0 z0G7wHT|RLC>KNIJhyHed(T?{OC7HZ@+HYpR_t1>*UVN70uf38+D8WoSKI^$ebci;gSQ&M44XWC<{C@j_KSLxSCJtR-=%%}!{-B>iw3_bN zk~wxsaxEqY&CGZWvM3ZU?HMeQelAcfI6fE4a7pk(PRFHl#o zYtZ5|=+XVxk+0ajQx;AR9Dx^?@b}YPH~sVff5xBxlQ`MmiRz?2#{Q?OK_!1yCjWQ6 zFvxlPpV;u1fVtNO;!)5frIld^#5Kr#HtWw_Q9$Ou^SF6=P?9{NO4$DCxp}((?n)WG zVdm}S`ass2B$i0oI|*{_NrC&fwjX2D^Il>6e@~l-t%ykRfrHD7T40Uh5m189+w9Gbvf(!Xzrc_iSj5QJzBRLoI z@R;-GIULhmw@AG2tA+aWoO@DX+u+9s>l8p2yTCzaF<;b2H?DHQLQA@v|5ab@|7&Z? zKWq5gn60kh)caW@6Iiz^82QyANoN%Z(kb?%9%Jj@OJJa#HCfrjR10xT zJywoN7%hbjqzP7ge}8M%0|XvEwc#z`W=2N->xZiUp(5mldq3ym5lg(#bScv(W)_3R z@&uLgkE`kg-{ND2#>A~8&(!3D9QQay=Vpy$Ip3wIxw5zYh^CDdtf%nr3SG=hUj$qi zP{+S;2ydSkxD=R**sp*PVz_)lM`fa6*#;yGz zayBP6@L$b!4R9I$7-}~5^4pakjb+z6%pG~b9R1DCeY04c@i06APBN@^25+|v@ou_8 zOTsVt;uREb8z+pqydP)Gy?!p8aLEb}JHS06m{%K^i?)4RM*CCdv-Ob=G}>NqE6@_j zngXTe{z>J_Kdu!N`KQ~sQo}sz-hp1&IgF9G@Vrx3Y;Rw5jy^B?plQ3^X~_3;zh&lW za2|p68G&G!ik&xwO~Q{+!SS0nW$JZ=uTXAFA)e=fA?Xj7kM-s1!)bPfw=xl1nj<>U z9VDieL?-fa9(T@fO7B1UDbBiQnjzUE@~R?v-@`wiVlVUMZHA!TyASYh`rJuhX|~f) zEDCII4iEa_FZp8>so`m7NBCjbdc`qW;j-Tpz6TbhruFJCJ&t5cs{?#}g}=?7fAzDz zIbfMmz|LP%nZ2A^Tx-~q@I%GT>8p60N12{?(icH$Wwgv>?l@_g%hXxq(r+g9oS5_G zoig6!!^DT^e)+XEdW5{X&1A9YMsxZ>Ltbk84|dA`L=F94rKtVS{oDN;d&ASGS5X;h zsE?^C^*y_HR=^=@I_?S<^&UPia-A&eJiciiKFH72yOTri6rogAl54 zv)TlErN?Fqv_oS9b&QNnO2*+*Fy|h;7l&-yHm-J}&1Y~-`uT&Y2RHk3K8@t1F=h0c z^S;Gu+Yryp*f$h%1n-=(h#3Psi^&Zshm!72DXZ`o<(DyZM|CXDjkM11y}u2Y+VJ!{ z(xm1mG>;ZRii{ZeZfe|Ozqcpu;6cfq7GK%7!8V z1RGlRuyukVmQ&SO6wRokB0OvMsE5?td&P;7@`kF&u8ljUwQMVTmtOmO>=DOmOO0}h zeG$%!?CBL@g7m~7;$Net+p`|YU){y7AUqEr5_S#H?Yj3U3|Vc7TGV7dn&e^*yY0Hl zp;>+-Lsy(;C9|cPaF@Ks2VHoG&eBJVX8B$qK!kxE2OX-?`9V>5wW^=r|jRe*phrL~UhefFznF|$Cht~uh z;nU{v^Od{&1Tdz`nUP`++83y483?Qqh&Q64Yx0f?93EtgDGNOlIfa|d z?>sn0M{m`TkQ06k-4$VEvT_N|3^4gnmJbAGcN}}aBVkBPt>|O-J(g{;(8}?Wr!9ja z>vvTv$Z*DSdX<7Yjl!;OjsmgJ`sxb1kA8=A(iGal41T@eEpd^^*Wb(BN}fSN@4AJJ zw)7Va-!xH}uJ<~}Rf!Hz_Z~3MIoh)A7&#L>%*>_1KR=(0HU1a}uax`jK*?)vCxJ*>ywlSV{qST0WCz;Siz~mLu-TN85toL7gjhy$?JD2uk*tbf`$aRuu z&iK5_Y$r3-c=2>!Eh^pGbk4-1ah90?68xBMAPBe6KEL|j{(+)wJG-y5Do$Enp&}FMWu;g@C3cb$m8Klr>+T%BUg;-pY6=%n~{`4gJB>t;ff$jiU zj2APQd}=ZTKcC+-oo!zA=5nZf5+6{M9$8duD;k?clQ3gb4wuGdu`z*^u?zRlzGb)k zF1AM#>$3t@tQzNNdd6mGI1jOL5w93baU@{<*mZa0&8rnw=ZA=mx1t)$AroAskH&IF z_e=Rdk-N_&SIE$rK*yKhP6dcK5gMYrkP+e{rU?{;D9y3}{>+Z~hw@)hWw9i?dysmz5K$b^ael{(W z(0tju(N=ure+%N3&l)-IA4$t;|H#9vTfz1CSlNHh#b*pUwF0=|2P81$F|f&R&pFXJ z_(*XDIN~7{*#0W)?eyuzw@SNN*(L&hWXT5&(NF`rCG77a?YNm?c*g>R)<}nHARzyG zaMm0yx}|KzNh{NjBcDU?ELjfr(1Y0rd2d*W zF3lU$y4rG72Il&;y*R;tmDXET(^8YVE--i4%Uk(%CE@cbk^F|vDW{CFysA~BHss^$ zgxf(+2Mu5A9=L|4{FaMp0=bxdW|H9X%+#&@Nlk*NzW3$H@HnAMjrzk11L_Wx>xeZJGMVZcO9@3Zujtw(~aXn$Tz>PWm0g? zFMDe~)=+MWh3V*(0&QxJo$U~+zPw+gN}D(+Em8daz(mA&TZl;h#Ti`{-lyC@p4xq? zIhm7wY^4|uGxBI|j?bq|9zTiKq)0Smd2MoQL-I3aWBDmxo4>t5jjgg7Hlo8J0HM{O zEPkr#6h>He492fj;?>lPYGBj&V4PRn5gCL;!c-~~wvm>h6upB(NZ{dbNbLPt)pdORSW&0E6R8J}zcqvw*yOiW%>Pgqdnc=H zRI3UD#>(Qm++$sqkPAt~y5m3VZ8~N-+ zMsfl&76^Yqa&cL(shR3jMS5{hhRyXhL0_KX>KJ&$KmwwDE2q$dV$EAk`dET2E! zy{~PN-~{@7Bj*!Y`7plnohT-U?D&z2VqzI$XYDrvXfe&Y<2PRto`pVMzx_Z2JDns} zn?3p((cul@wumggs6Blbct<8hcM3_wH=ld)V{$UBQ2;#OebQ)z!}A7-i&% z6I_=|5PZ+1tEKSc8w}#|jik;@nlOmaC{F)-Qt>;XC#T)T41<;%VV{B#>mPce6 z?O9X9yGcbB2ZrYf&u0U4-t38N{T#d0@!j+2RBM3P%-gV9EUZBwiele3s3UudwXL#+ zhb2E?Wy{JtitFi)%t0FZ$-dFj`fxKcjDPrRP9AyjLzvWw(GQ@DoIpioZQsPvw@iN- zgTkSrX~=xPowHlKG8y)=8sC~ubh&zW9If=hbL0m>WPV0e(I-dTP(gLUn75>v{+rlq zWV-I5tp&DvbSfW46`Z?tFLv zFUr8fVYuvm+*dDAy9`O|oGEt^+LSfrsF=Rqkx)@REm=FNv5?}yd?ivio$Jy9_V)V) z@!AWLt<7&T)m5Fo7)D`w!>zCoi(ynW)i4G(?hv4VjecRvTl09%J-%2RtwwG*?7kGS zQWH|D)TAlS#ygs$P`r)jqD?1u`i%#XyBEMXws)GLrqWW?;6VbDrM!}D2p@1TZ;kti z6%-6GfwrwnE!^CM%kZr)L}MfJhkU&W3=HS`oe7&f)=a(P+ve8#7%b9n(AJ;jIdV=d zt!;!ET2Kw#E=g|7THxa=*CZ{}q!8RSr`fo_sjaapSX#e(ul4K zuV5DqTv)@4JySM1Qi+)l;W{V!nkad1BVnkiCx~#H$u)ZSp(3NOIEg5YyPG<5j&Pti z71SQuA@R+~#Aam6@nPDYy;Hdn$caSvm1}JR)(Yq-Q(2jkWs1MZ#$81tyLohWeJudS zaKfZTcZ*?7@^VQqI1m;L8HtJ@w5Pj?^R{N(#F(oy>+t=y-h*TkX`w79hfM5~Cs_B_sP+9C zhnXzO1k5{+Y`%M~@0Mn}G9}JJ25ipL`h|=NiIbP8*Y+>c%N@0*q&2DbH#Cb-AHnSOF`8|&67 zaL_An_i}AFj3!a$M2{oA*{fxX+xT4PqN9EFp3#TSivW8uwE|YNZF_dn!Mk21KiEa} z+4ysEQkHG>Xbp?g(@;dd-Nj}qZ094vCIs?{M@ zST*%xg5s)-`rVHmIhJu!z6pvbHsu)2UelwS@7y3?%VeuY)kFe2!__;sg;w?x^g+ZD z{)F!Q>a;|TrgbV}T}a8?fagAbisEWwPVf7!JSyoTZ{4XEcRkP%u=iALnc%?Lo>vWj zUc@;^&UwdC7+*ttjWL>BT=2FLog8~v&Rby(S8u_%moz>h8#Avbo1%QLSm^|^Dl-q7&akijRN_GdQK&fWdh(^AUQh}BjXsmQe)5$ z{j4+JsK)@2;imUUibil3)q0uzOMm8xtaBy%D~{c}{Vl3GAC7&nrT2ojjAO`B8*0mz z`F9V{ZX(0D%@Kz>@fCG;A0TcgVVPqe{DWQ067=ZjD0=hFdJBiYJYxm(al){ z9M1K5_Va35jTCG*rEqZ!JW{4SN&S;)RL`cXo{L;W$Ck?D-JL_h19_de4H+u3N}>6)%I z{FXNJd^-&VEBZ1o)pM_9U$Mrh$rnK>yg_pQx7&)Sz#$IyRfn}yN${%Z?r4wECB4(t zDZrXq8Q2h7xvpm@AyJlC;oF`UjD!*|Kzd^G>qv**j1~n7^47TxOHjX;e=&BzQPRXG6grdl zq+puAe8`mxiy#{-{Zt*&io>L5EKAJ}g@i%U_apdy!Xm5fmHH=rpD@&hiiQ8wl+$`> zUo!G`{-(TN?1agJR^>00P6?sQHZ`fok+QwOU?7QpmunA zc966ZLzQQGP33jka-%O}mldd<^8^)_JFl1fw}t9%-ejBG^8UK>2&UfHRKuRbbS!J{ z^ea$snZPfPCf0ZH;^z$yZB0A1ol4V^UVmum=1H)tzg_m`#foN2!iwf-DN~)zQ;Ncu zj?GGCaG6_+U-70`Q(Hf8-+o$>cMY=KPBDz?w?6pBltRdr>=D!WK3xu0f&I3ABUX6V z(OwhTu{`=}fVNA8^uGApPCPo)SV_s`sL11?+iiX;^A! z={BjcHoqH8kcg_&p+Tyg!zT#$m&Zpg8)&ILtV(QM8Z`8>`9xT4w@{=C;Z;fExg zqfy@~`7(A?&gD}ty=UYp)sy?A<+g!WTi~abuko$qXFD^rVWxd=2mNM!Z<{bGKZHDHJv+EY0}VbW=2| zBfqkxdfYywJ7iFgySxn`C-7~>&wqb-4NA!O1h#Fyww{`gdU*mAdNSw~1d0az!c+Uh y*CkW`FBfUhm@{g(!BKJNcY0n2XNtWq@9-tBRBV|QEJ_<(gi^FIJEQE-v~ literal 0 HcmV?d00001 From fb092a6ec8ca3042c3384fe164203fff62be9b0f Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Sun, 18 Sep 2022 22:22:35 -0400 Subject: [PATCH 6/7] upd readMe --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f010b0c..110bf7c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ CUDA Stream Compaction This Project involves: * CPU version of scan -* COU version of scan without using scan +* CPU version of scan without using scan * CPU version of compact with scan * GPU version of naive scan * GPU version of work-efficient scan From 0013456960a01f8e29aeab834d756191e3ec3c19 Mon Sep 17 00:00:00 2001 From: HanLinSun <49753678+HanLinSun@users.noreply.github.com> Date: Mon, 19 Sep 2022 16:13:26 -0400 Subject: [PATCH 7/7] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 110bf7c..91a1a85 100644 --- a/README.md +++ b/README.md @@ -101,4 +101,5 @@ That's why it is way more faster. ## String Compaction -![Compact Chart](img/Analysis%202.JPG) \ No newline at end of file +![Compact Chart](img/Analysis%202.JPG) +In this chart, the lower the attribute is, the better its performance.