Skip to content

Commit 767ad98

Browse files
committed
aobench: added more tunable parameters
1 parent 28f1b1c commit 767ad98

File tree

6 files changed

+44
-36
lines changed

6 files changed

+44
-36
lines changed

samples/aobench/ao.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,13 @@ FUNCTION static unsigned char aobench_clamp(Scalar f)
203203
return (unsigned char) s;
204204
}
205205

206-
FUNCTION EXTERNAL_FN Ctx get_init_context() {
206+
EXTERNAL_FN Ctx get_init_context() {
207207
return (Ctx) {
208208
.rng = 0xFEEFDEED,
209209
};
210210
}
211211

212-
FUNCTION EXTERNAL_FN void render_pixel(Ctx* ctx, int x, int y, int w, int h, int nsubsamples, unsigned char* img) {
212+
EXTERNAL_FN void render_pixel(Ctx* ctx, int x, int y, int w, int h, int nsubsamples, TEXEL_T* img) {
213213
Scalar pixel[3] = { 0, 0, 0 };
214214

215215
ctx->rng = x * w + y;
@@ -269,7 +269,7 @@ FUNCTION EXTERNAL_FN void render_pixel(Ctx* ctx, int x, int y, int w, int h, int
269269
img[3 * (y * w + x) + 2] = aobench_clamp(pixel[2]);
270270
}
271271

272-
FUNCTION EXTERNAL_FN void init_scene(Ctx* ctx)
272+
EXTERNAL_FN void init_scene(Ctx* ctx)
273273
{
274274
ctx->spheres[0].center.x = -2.0f;
275275
ctx->spheres[0].center.y = 0.0f;
@@ -294,4 +294,4 @@ FUNCTION EXTERNAL_FN void init_scene(Ctx* ctx)
294294
ctx->plane.n.y = 1.0f;
295295
ctx->plane.n.z = 0.0f;
296296

297-
}
297+
}

samples/aobench/ao.comp.cpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,27 +32,21 @@ float cosf(float) __asm__("shady::prim_op::cos");
3232
float fmodf(float, float) __asm__("shady::prim_op::mod");
3333
float fabsf(float) __asm__("shady::prim_op::abs");
3434
float floorf(float) __asm__("shady::prim_op::floor");
35+
36+
#define EXTERNAL_FN static
37+
#define FUNCTION static
38+
3539
#include "ao.c"
3640

37-
extern "C" __attribute__((annotate("shady::workgroup_size::16::16::1")))
38-
compute_shader void aobench_kernel(global unsigned char* out) {
39-
//outColor = (vec4) { fragColor[0], fragColor[1], fragColor[2], 1.0f };
40-
//outColor = (vec4) { fragCoord[0] / 1024, fragCoord[1] / 1024, 1.0f, 1.0f };
41+
#define xstr(s) str(s)
42+
#define str(s) #s
4143

44+
extern "C" __attribute__((annotate("shady::workgroup_size::" xstr(BLOCK_SIZE) "::" xstr(BLOCK_SIZE) "::1")))
45+
compute_shader void aobench_kernel(global TEXEL_T* out) {
4246
Ctx ctx = get_init_context();
4347
init_scene(&ctx);
4448

4549
int x = global_id.x;
4650
int y = global_id.y;
47-
//int x = (int) fragCoord.x % 1024;
48-
//int y = (int) fragCoord.y % 1024;
49-
50-
// unsigned int out[3]; // = { 55, 0, 0};
51-
out[0] = 255;
52-
out[1] = 255;
53-
render_pixel(&ctx, x + 3, y, WIDTH, HEIGHT, NSUBSAMPLES, (unsigned char*) out);
54-
//out[2] = 155;
55-
// out[0] = x / 4;
56-
// out[1] = y / 4;
57-
//outColor = (vec4) { ((int) out[0]) / 255.0f, ((int) out[1]) / 255.0f, ((int) out[2]) / 255.0f, 1.0f };
58-
}
51+
render_pixel(&ctx, x, y, WIDTH, HEIGHT, NSUBSAMPLES, (TEXEL_T*) out);
52+
}

samples/aobench/ao.cu

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
1-
#define FUNCTION __device__
1+
#define EXTERNAL_FN static inline __device__ __attribute__((always_inline))
2+
#define FUNCTION static inline __device__ __attribute__((always_inline))
3+
24
#include "ao.c"
35

46
extern "C" {
57

6-
__global__ void aobench_kernel(unsigned char* out) {
8+
__global__ void aobench_kernel(unsigned TEXEL_T* out) {
79
int x = threadIdx.x + blockDim.x * blockIdx.x;
810
int y = threadIdx.y + blockDim.y * blockIdx.y;
911

10-
long int ptr = (long int) out;
1112
Ctx ctx = get_init_context();
1213
init_scene(&ctx);
1314
render_pixel(&ctx, x, y, WIDTH, HEIGHT, NSUBSAMPLES, out);
15+
// out[3 * (y * 2048 + x) + 0] = 255;
16+
// out[3 * (y * 2048 + x) + 1] = 255;
17+
// out[3 * (y * 2048 + x) + 2] = 255;
1418
}
1519

1620
}

samples/aobench/ao.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define NSUBSAMPLES 1
44
#define NAO_SAMPLES 8
55
#define BLOCK_SIZE 16
6+
#define TEXEL_T unsigned char
67

78
typedef float Scalar;
89

@@ -64,10 +65,6 @@ typedef struct {
6465
unsigned int rng;
6566
} Ctx;
6667

67-
#ifndef EXTERNAL_FN
68-
#define EXTERNAL_FN static
69-
#endif
70-
7168
EXTERNAL_FN Ctx get_init_context();
7269
EXTERNAL_FN void init_scene(Ctx*);
73-
EXTERNAL_FN void render_pixel(Ctx*, int x, int y, int w, int h, int nsubsamples, unsigned char* img);
70+
EXTERNAL_FN void render_pixel(Ctx*, int x, int y, int w, int h, int nsubsamples, TEXEL_T* img);

samples/aobench/ao_host.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#define private
22
#define EXTERNAL_FN /* not static */
3+
#define FUNCTION static
34
#include "ao.c"

samples/aobench/ao_main.c

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#define EXTERNAL_FN /* not static */
2+
23
#include "ao.h"
34
#include "../runtime/runtime_app_common.h"
45

@@ -24,7 +25,7 @@ static uint64_t timespec_to_nano(struct timespec t) {
2425
return t.tv_sec * 1000000000 + t.tv_nsec;
2526
}
2627

27-
void saveppm(const char *fname, int w, int h, unsigned char *img) {
28+
void saveppm(const char *fname, int w, int h, TEXEL_T* img) {
2829
FILE *fp;
2930

3031
fp = fopen(fname, "wb");
@@ -33,11 +34,15 @@ void saveppm(const char *fname, int w, int h, unsigned char *img) {
3334
fprintf(fp, "P6\n");
3435
fprintf(fp, "%d %d\n", w, h);
3536
fprintf(fp, "255\n");
36-
fwrite(img, w * h * 3, 1, fp);
37+
// fwrite(img, w * h * 3, 1, fp);
38+
for (size_t i = 0; i < w * h * 3; i++) {
39+
unsigned char c = img[i];
40+
fwrite(&c, 1, 1, fp);
41+
}
3742
fclose(fp);
3843
}
3944

40-
void render_host(unsigned char *img, int w, int h, int nsubsamples) {
45+
void render_host(TEXEL_T* img, int w, int h, int nsubsamples) {
4146
int x, y;
4247
Scalar* fimg = (Scalar *)malloc(sizeof(Scalar) * w * h * 3);
4348
memset((void *)fimg, 0, sizeof(Scalar) * w * h * 3);
@@ -68,7 +73,7 @@ typedef struct {
6873

6974
extern Vec3u builtin_NumWorkgroups;
7075

71-
void render_ispc(unsigned char *img, int w, int h, int nsubsamples) {
76+
void render_ispc(TEXEL_T* img, int w, int h, int nsubsamples) {
7277
struct timespec ts;
7378
timespec_get(&ts, TIME_UTC);
7479
uint64_t tsn = timespec_to_nano(ts);
@@ -94,7 +99,7 @@ void render_ispc(unsigned char *img, int w, int h, int nsubsamples) {
9499
}
95100
#endif
96101

97-
void render_device(Args* args, unsigned char *img, int w, int h, int nsubsamples, String path) {
102+
void render_device(Args* args, TEXEL_T *img, int w, int h, int nsubsamples, String path, bool import_memory) {
98103
for (size_t i = 0; i < WIDTH; i++) {
99104
for (size_t j = 0; j < HEIGHT; j++) {
100105
img[j * WIDTH * 3 + i * 3 + 0] = 255;
@@ -112,7 +117,12 @@ void render_device(Args* args, unsigned char *img, int w, int h, int nsubsamples
112117
img[0] = 69;
113118
info_print("malloc'd address is: %zu\n", (size_t) img);
114119

115-
Buffer* buf = import_buffer_host(device, img, sizeof(uint8_t) * WIDTH * HEIGHT * 3);
120+
Buffer* buf;
121+
if (import_memory)
122+
buf = import_buffer_host(device, img, sizeof(*img) * WIDTH * HEIGHT * 3);
123+
else
124+
buf = allocate_buffer_device(device, sizeof(*img) * WIDTH * HEIGHT * 3);
125+
116126
uint64_t buf_addr = get_buffer_device_pointer(buf);
117127

118128
info_print("Device-side address is: %zu\n", buf_addr);
@@ -132,6 +142,8 @@ void render_device(Args* args, unsigned char *img, int w, int h, int nsubsamples
132142

133143
debug_print("data %d\n", (int) img[0]);
134144

145+
if (!import_memory)
146+
copy_from_buffer(buf, 0, img, sizeof(*img) * WIDTH * HEIGHT * 3);
135147
destroy_buffer(buf);
136148

137149
shutdown_runtime(runtime);
@@ -170,7 +182,7 @@ int main(int argc, char **argv) {
170182
}
171183
}
172184

173-
unsigned char *img = (unsigned char *)malloc(WIDTH * HEIGHT * 3);
185+
void *img = malloc(WIDTH * HEIGHT * 3 * sizeof(TEXEL_T));
174186

175187
if (do_host || do_all) {
176188
render_host(img, WIDTH, HEIGHT, NSUBSAMPLES);
@@ -185,7 +197,7 @@ int main(int argc, char **argv) {
185197
#endif
186198

187199
if (do_device || do_all) {
188-
render_device(&args, img, WIDTH, HEIGHT, NSUBSAMPLES, "./ao.comp.c.ll");
200+
render_device(&args, img, WIDTH, HEIGHT, NSUBSAMPLES, "./ao.comp.c.ll", false);
189201
saveppm("device.ppm", WIDTH, HEIGHT, img);
190202
}
191203

0 commit comments

Comments
 (0)