Skip to content

Commit 6639342

Browse files
authored
Added micro-benchmark for groupby summation (#1093)
1 parent 3aba566 commit 6639342

File tree

3 files changed

+138
-0
lines changed

3 files changed

+138
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ temp/
4545
microbench/sort/sort
4646
microbench/writecsv/writecsv
4747
microbench/fread/fread
48+
microbench/groupby/groupby
4849

4950

5051
# Auto-generated files

microbench/groupby/Makefile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
CC = clang++
3+
INCLUDES ?= -I..
4+
CCFLAGS += -std=c++11 -O3
5+
LDFLAGS += -lc++
6+
7+
8+
build: groupby
9+
10+
groupby: groupby.o
11+
$(CC) $(LDFLAGS) -o $@ $+ $(LIBRARIES)
12+
13+
groupby.o: groupby.cc
14+
$(CC) $(CCFLAGS) $(INCLUDES) -o $@ -c $<
15+
16+
clean:
17+
rm -f *.o groupby
18+
19+
debug:
20+
$(MAKE) clean
21+
CCFLAGS=-ggdb LDFLAGS=-ggdb \
22+
$(MAKE) build

microbench/groupby/groupby.cc

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#include <cstdio> // std::printf
2+
#include <cstdlib> // std::rand, std::srand
3+
#include <ctime> // std::time
4+
#include "utils.h"
5+
6+
7+
int32_t* x = nullptr;
8+
int32_t* o = nullptr;
9+
int32_t* g = nullptr;
10+
11+
12+
void prepare_data(int N, int K, int seed) {
13+
std::srand(seed);
14+
// Initialize array x
15+
x = new int32_t[N];
16+
for (int i = 0; i < N; ++i) {
17+
x[i] = std::rand() % K;
18+
}
19+
// Create ordering+grouping vectors
20+
o = new int32_t[N];
21+
g = new int32_t[K + 1];
22+
for (int i = 0; i < N; ++i) g[x[i]]++;
23+
int t = 0;
24+
for (int i = 0; i <= K; ++i) {
25+
int c = g[i];
26+
g[i] = t;
27+
t += c;
28+
}
29+
for (int i = 0; i < N; ++i) o[g[x[i]]++] = i;
30+
// Check ordering vector
31+
if (g[K] != N) exit(2);
32+
for (int i = 1; i < N; ++i) {
33+
if (x[o[i]] < x[o[i-1]]) {
34+
std::printf("Incorrect ordering at index %d: x[%d] = %d, and x[%d] = %d\n",
35+
i, o[i], x[o[i]], o[i-1], x[o[i-1]]);
36+
exit(1);
37+
}
38+
}
39+
}
40+
41+
42+
int64_t* method1(int N, int K) {
43+
int64_t* res = new int64_t[K];
44+
for (int j = 0; j < K; ++j) {
45+
int i0 = g[j];
46+
int i1 = g[j + 1];
47+
int64_t sum = 0;
48+
for (int i = i0; i < i1; ++i) {
49+
int val = x[o[i]];
50+
sum += val;
51+
}
52+
res[j] = sum;
53+
}
54+
return res;
55+
}
56+
57+
int64_t* method2(int N, int K) {
58+
// Compute group assignment vector
59+
int32_t* grass = new int32_t[N];
60+
for (int j = 0; j < K; ++j) {
61+
const int32_t* optr = o + g[j];
62+
const int32_t* oend = o + g[j + 1];
63+
while (optr < oend) grass[*optr++] = j;
64+
}
65+
// Compute gsum
66+
int64_t* res = new int64_t[K];
67+
for (int i = 0; i < N; ++i) {
68+
res[grass[i]] += x[i];
69+
}
70+
delete[] grass;
71+
return res;
72+
}
73+
74+
75+
76+
int main(int argc, char **argv)
77+
{
78+
// N - array size
79+
// K - number of groups
80+
int N = getCmdArgInt(argc, argv, "n", 10000000);
81+
int K = getCmdArgInt(argc, argv, "k", 1000);
82+
int S = getCmdArgInt(argc, argv, "seed", 0);
83+
if (S == 0) S = std::time(nullptr);
84+
std::printf("Array size n = %d\n", N);
85+
std::printf("Num groups k = %d\n", K);
86+
std::printf("Seed = %d\n", S);
87+
std::printf("\n");
88+
89+
std::printf("Generating data...");
90+
prepare_data(N, K, S);
91+
std::printf("ok.\n");
92+
93+
std::printf("Computing with method1 (simple): ");
94+
double t0 = now();
95+
int64_t* res1 = method1(N, K);
96+
double t1 = now();
97+
std::printf("time = %g ms\n", (t1 - t0) * 1000);
98+
99+
std::printf("Computing with method2 (gsum): ");
100+
double t2 = now();
101+
int64_t* res2 = method2(N, K);
102+
double t3 = now();
103+
std::printf("time = %g ms\n", (t3 - t2) * 1000);
104+
105+
std::printf("Comparing...");
106+
for (int i = 0; i < K; ++i) {
107+
if (res1[i] != res2[i]) {
108+
std::printf("Difference at index %d: %lld vs %lld\n", i, res1[i], res2[i]);
109+
exit(3);
110+
}
111+
}
112+
std::printf("ok.\n");
113+
114+
return 1;
115+
}

0 commit comments

Comments
 (0)