Skip to content

Commit 4614985

Browse files
authored
Merge pull request #480 from abergeron/fix_kernel_cache
Fix kernel cache
2 parents 3cef2c9 + fee0445 commit 4614985

File tree

1 file changed

+74
-44
lines changed

1 file changed

+74
-44
lines changed

src/gpuarray_buffer_cuda.c

Lines changed: 74 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,26 @@ static int cuda_records(gpudata *, int, CUstream);
4747
static int detect_arch(const char *prefix, char *ret, error *e);
4848
static gpudata *new_gpudata(cuda_context *ctx, CUdeviceptr ptr, size_t size);
4949

50-
typedef struct _kernel_key {
50+
typedef struct _disk_key {
5151
uint8_t version;
5252
uint8_t debug;
5353
uint8_t major;
5454
uint8_t minor;
5555
uint32_t reserved;
5656
char bin_id[64];
5757
strb src;
58+
} disk_key;
59+
60+
typedef struct _kernel_key {
61+
const char *fname;
62+
strb src;
5863
} kernel_key;
5964

60-
/* Size of the kernel_key that we can memcopy to duplicate */
61-
#define KERNEL_KEY_MM (sizeof(kernel_key) - sizeof(strb))
65+
/* Size of the disk_key that we can memcopy to duplicate */
66+
#define DISK_KEY_MM (sizeof(disk_key) - sizeof(strb))
6267

63-
static void key_free(cache_key_t _k) {
64-
kernel_key *k = (kernel_key *)_k;
68+
static void disk_free(cache_key_t _k) {
69+
disk_key *k = (disk_key *)_k;
6570
strb_clear(&k->src);
6671
free(k);
6772
}
@@ -71,45 +76,60 @@ static int strb_eq(strb *k1, strb *k2) {
7176
memcmp(k1->s, k2->s, k1->l) == 0);
7277
}
7378

74-
static uint32_t strb_hash(strb *k) {
75-
return XXH32(k->s, k->l, 42);
79+
static int kernel_eq(kernel_key *k1, kernel_key *k2) {
80+
return (strcmp(k1->fname, k2->fname) == 0 &&
81+
strb_eq(&k1->src, &k2->src));
7682
}
7783

78-
static int key_eq(kernel_key *k1, kernel_key *k2) {
79-
return (memcmp(k1, k2, KERNEL_KEY_MM) == 0 &&
84+
static uint32_t kernel_hash(kernel_key *k) {
85+
XXH32_state_t state;
86+
XXH32_reset(&state, 42);
87+
XXH32_update(&state, k->fname, strlen(k->fname));
88+
XXH32_update(&state, k->src.s, k->src.l);
89+
return XXH32_digest(&state);
90+
}
91+
92+
static void kernel_free(kernel_key *k) {
93+
free((void *)k->fname);
94+
strb_clear(&k->src);
95+
free(k);
96+
}
97+
98+
static int disk_eq(disk_key *k1, disk_key *k2) {
99+
return (memcmp(k1, k2, DISK_KEY_MM) == 0 &&
80100
strb_eq(&k1->src, &k2->src));
81101
}
82102

83-
static int key_hash(kernel_key *k) {
103+
static int disk_hash(disk_key *k) {
84104
XXH32_state_t state;
85105
XXH32_reset(&state, 42);
86-
XXH32_update(&state, k, KERNEL_KEY_MM);
106+
XXH32_update(&state, k, DISK_KEY_MM);
87107
XXH32_update(&state, k->src.s, k->src.l);
88108
return XXH32_digest(&state);
89109
}
90110

91-
static int key_write(strb *res, kernel_key *k) {
92-
strb_appendn(res, (const char *)k, KERNEL_KEY_MM);
111+
static int disk_write(strb *res, disk_key *k) {
112+
strb_appendn(res, (const char *)k, DISK_KEY_MM);
93113
strb_appendb(res, &k->src);
94114
return strb_error(res);
95115
}
96116

97-
static kernel_key *key_read(const strb *b) {
98-
kernel_key *k;
99-
if (b->l < KERNEL_KEY_MM) return NULL;
117+
static disk_key *disk_read(const strb *b) {
118+
disk_key *k;
119+
if (b->l < DISK_KEY_MM) return NULL;
100120
k = calloc(1, sizeof(*k));
101121
if (k == NULL) return NULL;
102-
memcpy(k, b->s, KERNEL_KEY_MM);
122+
memcpy(k, b->s, DISK_KEY_MM);
103123
if (k->version != 0) {
104124
free(k);
105125
return NULL;
106126
}
107-
if (strb_ensure(&k->src, b->l - KERNEL_KEY_MM) != 0) {
127+
if (strb_ensure(&k->src, b->l - DISK_KEY_MM) != 0) {
108128
strb_clear(&k->src);
109129
free(k);
110130
return NULL;
111131
}
112-
strb_appendn(&k->src, b->s + KERNEL_KEY_MM, b->l - KERNEL_KEY_MM);
132+
strb_appendn(&k->src, b->s + DISK_KEY_MM, b->l - DISK_KEY_MM);
113133
return k;
114134
}
115135

@@ -238,9 +258,9 @@ cuda_context *cuda_make_ctx(CUcontext ctx, int flags) {
238258
}
239259

240260
res->kernel_cache = cache_twoq(64, 128, 64, 8,
241-
(cache_eq_fn)strb_eq,
242-
(cache_hash_fn)strb_hash,
243-
(cache_freek_fn)strb_free,
261+
(cache_eq_fn)kernel_eq,
262+
(cache_hash_fn)kernel_hash,
263+
(cache_freek_fn)kernel_free,
244264
(cache_freev_fn)cuda_freekernel, global_err);
245265
if (res->kernel_cache == NULL) {
246266
error_cuda(global_err, "cuStreamCreate", err);
@@ -250,9 +270,9 @@ cuda_context *cuda_make_ctx(CUcontext ctx, int flags) {
250270
cache_path = getenv("GPUARRAY_CACHE_PATH");
251271
if (cache_path != NULL) {
252272
mem_cache = cache_lru(64, 8,
253-
(cache_eq_fn)key_eq,
254-
(cache_hash_fn)key_hash,
255-
(cache_freek_fn)key_free,
273+
(cache_eq_fn)disk_eq,
274+
(cache_hash_fn)disk_hash,
275+
(cache_freek_fn)disk_free,
256276
(cache_freev_fn)strb_free,
257277
global_err);
258278
if (mem_cache == NULL) {
@@ -261,11 +281,11 @@ cuda_context *cuda_make_ctx(CUcontext ctx, int flags) {
261281
goto fail_disk_cache;
262282
}
263283
res->disk_cache = cache_disk(cache_path, mem_cache,
264-
(kwrite_fn)key_write,
284+
(kwrite_fn)disk_write,
265285
(vwrite_fn)kernel_write,
266-
(kread_fn)key_read,
286+
(kread_fn)disk_read,
267287
(vread_fn)kernel_read,
268-
res->err);
288+
global_err);
269289
if (res->disk_cache == NULL) {
270290
fprintf(stderr, "Error initializing disk cache, disabling: %s\n",
271291
global_err->msg);
@@ -1190,8 +1210,8 @@ static int make_bin(cuda_context *ctx, const strb *ptx, strb *bin, strb *log) {
11901210
static int compile(cuda_context *ctx, strb *src, strb* bin, strb *log) {
11911211
strb ptx = STRB_STATIC_INIT;
11921212
strb *cbin;
1193-
kernel_key k;
1194-
kernel_key *pk;
1213+
disk_key k;
1214+
disk_key *pk;
11951215

11961216
memset(&k, 0, sizeof(k));
11971217
k.version = 0;
@@ -1217,36 +1237,36 @@ static int compile(cuda_context *ctx, strb *src, strb* bin, strb *log) {
12171237
GA_CHECK(make_bin(ctx, &ptx, bin, log));
12181238

12191239
if (ctx->disk_cache) {
1220-
pk = calloc(sizeof(kernel_key), 1);
1240+
pk = calloc(sizeof(disk_key), 1);
12211241
if (pk == NULL) {
12221242
error_sys(ctx->err, "calloc");
12231243
fprintf(stderr, "Error adding kernel to disk cache: %s\n",
12241244
ctx->err->msg);
12251245
return GA_NO_ERROR;
12261246
}
1227-
memcpy(pk, &k, KERNEL_KEY_MM);
1247+
memcpy(pk, &k, DISK_KEY_MM);
12281248
strb_appendb(&pk->src, src);
12291249
if (strb_error(&pk->src)) {
12301250
error_sys(ctx->err, "strb_appendb");
12311251
fprintf(stderr, "Error adding kernel to disk cache %s\n",
12321252
ctx->err->msg);
1233-
key_free((cache_key_t)pk);
1253+
disk_free((cache_key_t)pk);
12341254
return GA_NO_ERROR;
12351255
}
12361256
cbin = strb_alloc(bin->l);
12371257
if (cbin == NULL) {
12381258
error_sys(ctx->err, "strb_alloc");
12391259
fprintf(stderr, "Error adding kernel to disk cache: %s\n",
12401260
ctx->err->msg);
1241-
key_free((cache_key_t)pk);
1261+
disk_free((cache_key_t)pk);
12421262
return GA_NO_ERROR;
12431263
}
12441264
strb_appendb(cbin, bin);
12451265
if (strb_error(cbin)) {
12461266
error_sys(ctx->err, "strb_appendb");
12471267
fprintf(stderr, "Error adding kernel to disk cache %s\n",
12481268
ctx->err->msg);
1249-
key_free((cache_key_t)pk);
1269+
disk_free((cache_key_t)pk);
12501270
strb_free(cbin);
12511271
return GA_NO_ERROR;
12521272
}
@@ -1284,8 +1304,9 @@ static int cuda_newkernel(gpukernel **k, gpucontext *c, unsigned int count,
12841304
strb src = STRB_STATIC_INIT;
12851305
strb bin = STRB_STATIC_INIT;
12861306
strb log = STRB_STATIC_INIT;
1287-
strb *psrc;
12881307
gpukernel *res;
1308+
kernel_key k_key;
1309+
kernel_key *p_key;
12891310
CUdevice dev;
12901311
CUresult err;
12911312
unsigned int i;
@@ -1350,7 +1371,10 @@ static int cuda_newkernel(gpukernel **k, gpucontext *c, unsigned int count,
13501371
return error_sys(ctx->err, "strb");
13511372
}
13521373

1353-
res = (gpukernel *)cache_get(ctx->kernel_cache, &src);
1374+
k_key.fname = fname;
1375+
k_key.src = src;
1376+
1377+
res = (gpukernel *)cache_get(ctx->kernel_cache, &k_key);
13541378
if (res != NULL) {
13551379
res->refcnt++;
13561380
strb_clear(&src);
@@ -1434,13 +1458,19 @@ static int cuda_newkernel(gpukernel **k, gpucontext *c, unsigned int count,
14341458
ctx->refcnt++;
14351459
cuda_exit(ctx);
14361460
TAG_KER(res);
1437-
psrc = memdup(&src, sizeof(strb));
1438-
if (psrc != NULL) {
1439-
/* One of the refs is for the cache */
1440-
res->refcnt++;
1441-
/* If this fails, it will free the key and remove a ref from the
1442-
kernel. */
1443-
cache_add(ctx->kernel_cache, psrc, res);
1461+
p_key = memdup(&k_key, sizeof(kernel_key));
1462+
if (p_key != NULL) {
1463+
p_key->fname = strdup(fname);
1464+
if (p_key->fname != NULL) {
1465+
/* One of the refs is for the cache */
1466+
res->refcnt++;
1467+
/* If this fails, it will free the key and remove a ref from the
1468+
kernel. */
1469+
cache_add(ctx->kernel_cache, p_key, res);
1470+
} else {
1471+
free(p_key);
1472+
strb_clear(&src);
1473+
}
14441474
} else {
14451475
strb_clear(&src);
14461476
}

0 commit comments

Comments
 (0)