Skip to content

Commit 671710a

Browse files
Merge pull request #4 from AndrewBoessen/global
2 parents b2cc210 + f784a13 commit 671710a

File tree

3 files changed

+89
-3
lines changed

3 files changed

+89
-3
lines changed

.github/workflows/makefile.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
- name: Build with Makefile
3636
run: |
3737
# Make sure the Makefile exists and then build the project
38-
if [ -f makefile ]; then
38+
if [ -f Makefile ]; then
3939
make
4040
else
4141
echo "Makefile not found!"

makefile renamed to Makefile

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@ CUDA_PATH = /opt/cuda
66
INCLUDES = -I$(CUDA_PATH)/include
77
LDFLAGS = -L$(CUDA_PATH)/lib64 -lcudart
88

9-
all: cpu_bitonic_sort warp_bitonic_sort smem_bitonic_sort
9+
all: cpu_bitonic_sort warp_bitonic_sort smem_bitonic_sort global_bitonic_sort
1010

1111
warp_bitonic_sort: main.o warp_bitonic_sort.o
1212
$(CXX) $^ -o $@ $(LDFLAGS)
1313

1414
smem_bitonic_sort: main.o smem_bitonic_sort.o
1515
$(CXX) $^ -o $@ $(LDFLAGS)
1616

17+
global_bitonic_sort: main.o global_bitonic_sort.o
18+
$(CXX) $^ -o $@ $(LDFLAGS)
19+
1720
cpu_bitonic_sort: cpu_bitonic_sort.cpp
1821
$(CXX) $^ -o $@
1922

@@ -26,5 +29,8 @@ warp_bitonic_sort.o: warp_bitonic_sort.cu bitonic_sort.cuh
2629
smem_bitonic_sort.o: smem_bitonic_sort.cu bitonic_sort.cuh
2730
$(NVCC) $(NVCCFLAGS) -c $< -o $@
2831

32+
global_bitonic_sort.o: global_bitonic_sort.cu bitonic_sort.cuh
33+
$(NVCC) $(NVCCFLAGS) -c $< -o $@
34+
2935
clean:
30-
rm -f *.o warp_bitonic_sort smem_bitonic_sort cpu_bitonic_sort
36+
rm -f *.o warp_bitonic_sort smem_bitonic_sort cpu_bitonic_sort global_bitonic_sort

global_bitonic_sort.cu

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* Global Memory Bitoic Sort
3+
*
4+
* This uses gpu global memory to sort arrays to sort long arrays of ints
5+
*
6+
* Author: Andrew Boessen
7+
*/
8+
9+
#include "bitonic_sort.cuh"
10+
11+
/**
12+
* Global Memory Bitonic Sort Swap
13+
*
14+
* This is used for swapping elements in bitonic sorting
15+
*
16+
* @param x caller line id's value
17+
* @param i current large step in bitonic sort sequence
18+
* @param j current small step in sequence
19+
* @param arr global memory array
20+
*
21+
*/
22+
__global__ void globalSwap(int i, int j, int *arr) {
23+
// thread id within grid
24+
int x = threadIdx.x + blockIdx.x * blockDim.x;
25+
26+
// distance between caller and source lanes
27+
int mask = 1 << (i - j);
28+
29+
// perform compare and swap
30+
int dir = x & (1 << i);
31+
32+
// get correspondin element to x in butterfly diagram
33+
int y = x ^ mask;
34+
// lower ids thread perform swap
35+
if (y > x) {
36+
if (dir) {
37+
// sort ascending
38+
if (arr[x] < arr[y]) {
39+
int temp = arr[x];
40+
arr[x] = arr[y];
41+
arr[y] = temp;
42+
}
43+
} else {
44+
// sort descending
45+
if (arr[x] > arr[y]) {
46+
int temp = arr[x];
47+
arr[x] = arr[y];
48+
arr[y] = temp;
49+
}
50+
}
51+
}
52+
}
53+
54+
/**
55+
* Global Memory Bitonic Sort
56+
*
57+
* @param arr Pointer to the array of integers to be sorted
58+
* @param size Total number of elements in the array
59+
* @param block_size Number of threads in one block
60+
* @param num_blocks Number of total block in grid
61+
*
62+
* @note This function assumes that the number elements in the arrays is a power
63+
* of two
64+
*
65+
* @see globalSwap() for the element comparison and swapping logic kernel
66+
*/
67+
void globalBitonicSort(int *arr, int size, int block_size,
68+
int num_blocks) { // make bitonic sequence and sort
69+
for (int i = 0; (1 << i) <= size; i++) {
70+
for (int j = 1; j <= i; j++) {
71+
globalSwap<<<num_blocks, block_size>>>(i, j, arr);
72+
}
73+
}
74+
}
75+
void launchBitonicSort(int *arr, int size) {
76+
const int BLOCK_SIZE = 512;
77+
const int NUM_BLOCKS = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
78+
// call sort function
79+
globalBitonicSort(arr, size, BLOCK_SIZE, NUM_BLOCKS);
80+
}

0 commit comments

Comments
 (0)