@@ -47,21 +47,26 @@ static int cuda_records(gpudata *, int, CUstream);
4747static int detect_arch (const char * prefix , char * ret , error * e );
4848static gpudata * new_gpudata (cuda_context * ctx , CUdeviceptr ptr , size_t size );
4949
50- typedef struct _kernel_key {
50+ typedef struct _disk_key {
5151 uint8_t version ;
5252 uint8_t debug ;
5353 uint8_t major ;
5454 uint8_t minor ;
5555 uint32_t reserved ;
5656 char bin_id [64 ];
5757 strb src ;
58+ } disk_key ;
59+
60+ typedef struct _kernel_key {
61+ const char * fname ;
62+ strb src ;
5863} kernel_key ;
5964
60- /* Size of the kernel_key that we can memcopy to duplicate */
61- #define KERNEL_KEY_MM (sizeof(kernel_key ) - sizeof(strb))
65+ /* Size of the disk_key that we can memcopy to duplicate */
66+ #define DISK_KEY_MM (sizeof(disk_key ) - sizeof(strb))
6267
63- static void key_free (cache_key_t _k ) {
64- kernel_key * k = (kernel_key * )_k ;
68+ static void disk_free (cache_key_t _k ) {
69+ disk_key * k = (disk_key * )_k ;
6570 strb_clear (& k -> src );
6671 free (k );
6772}
@@ -71,45 +76,60 @@ static int strb_eq(strb *k1, strb *k2) {
7176 memcmp (k1 -> s , k2 -> s , k1 -> l ) == 0 );
7277}
7378
74- static uint32_t strb_hash (strb * k ) {
75- return XXH32 (k -> s , k -> l , 42 );
79+ static int kernel_eq (kernel_key * k1 , kernel_key * k2 ) {
80+ return (strcmp (k1 -> fname , k2 -> fname ) == 0 &&
81+ strb_eq (& k1 -> src , & k2 -> src ));
7682}
7783
78- static int key_eq (kernel_key * k1 , kernel_key * k2 ) {
79- return (memcmp (k1 , k2 , KERNEL_KEY_MM ) == 0 &&
84+ static uint32_t kernel_hash (kernel_key * k ) {
85+ XXH32_state_t state ;
86+ XXH32_reset (& state , 42 );
87+ XXH32_update (& state , k -> fname , strlen (k -> fname ));
88+ XXH32_update (& state , k -> src .s , k -> src .l );
89+ return XXH32_digest (& state );
90+ }
91+
92+ static void kernel_free (kernel_key * k ) {
93+ free ((void * )k -> fname );
94+ strb_clear (& k -> src );
95+ free (k );
96+ }
97+
98+ static int disk_eq (disk_key * k1 , disk_key * k2 ) {
99+ return (memcmp (k1 , k2 , DISK_KEY_MM ) == 0 &&
80100 strb_eq (& k1 -> src , & k2 -> src ));
81101}
82102
83- static int key_hash ( kernel_key * k ) {
103+ static int disk_hash ( disk_key * k ) {
84104 XXH32_state_t state ;
85105 XXH32_reset (& state , 42 );
86- XXH32_update (& state , k , KERNEL_KEY_MM );
106+ XXH32_update (& state , k , DISK_KEY_MM );
87107 XXH32_update (& state , k -> src .s , k -> src .l );
88108 return XXH32_digest (& state );
89109}
90110
91- static int key_write (strb * res , kernel_key * k ) {
92- strb_appendn (res , (const char * )k , KERNEL_KEY_MM );
111+ static int disk_write (strb * res , disk_key * k ) {
112+ strb_appendn (res , (const char * )k , DISK_KEY_MM );
93113 strb_appendb (res , & k -> src );
94114 return strb_error (res );
95115}
96116
97- static kernel_key * key_read (const strb * b ) {
98- kernel_key * k ;
99- if (b -> l < KERNEL_KEY_MM ) return NULL ;
117+ static disk_key * disk_read (const strb * b ) {
118+ disk_key * k ;
119+ if (b -> l < DISK_KEY_MM ) return NULL ;
100120 k = calloc (1 , sizeof (* k ));
101121 if (k == NULL ) return NULL ;
102- memcpy (k , b -> s , KERNEL_KEY_MM );
122+ memcpy (k , b -> s , DISK_KEY_MM );
103123 if (k -> version != 0 ) {
104124 free (k );
105125 return NULL ;
106126 }
107- if (strb_ensure (& k -> src , b -> l - KERNEL_KEY_MM ) != 0 ) {
127+ if (strb_ensure (& k -> src , b -> l - DISK_KEY_MM ) != 0 ) {
108128 strb_clear (& k -> src );
109129 free (k );
110130 return NULL ;
111131 }
112- strb_appendn (& k -> src , b -> s + KERNEL_KEY_MM , b -> l - KERNEL_KEY_MM );
132+ strb_appendn (& k -> src , b -> s + DISK_KEY_MM , b -> l - DISK_KEY_MM );
113133 return k ;
114134}
115135
@@ -238,9 +258,9 @@ cuda_context *cuda_make_ctx(CUcontext ctx, int flags) {
238258 }
239259
240260 res -> kernel_cache = cache_twoq (64 , 128 , 64 , 8 ,
241- (cache_eq_fn )strb_eq ,
242- (cache_hash_fn )strb_hash ,
243- (cache_freek_fn )strb_free ,
261+ (cache_eq_fn )kernel_eq ,
262+ (cache_hash_fn )kernel_hash ,
263+ (cache_freek_fn )kernel_free ,
244264 (cache_freev_fn )cuda_freekernel , global_err );
245265 if (res -> kernel_cache == NULL ) {
246266 error_cuda (global_err , "cuStreamCreate" , err );
@@ -250,9 +270,9 @@ cuda_context *cuda_make_ctx(CUcontext ctx, int flags) {
250270 cache_path = getenv ("GPUARRAY_CACHE_PATH" );
251271 if (cache_path != NULL ) {
252272 mem_cache = cache_lru (64 , 8 ,
253- (cache_eq_fn )key_eq ,
254- (cache_hash_fn )key_hash ,
255- (cache_freek_fn )key_free ,
273+ (cache_eq_fn )disk_eq ,
274+ (cache_hash_fn )disk_hash ,
275+ (cache_freek_fn )disk_free ,
256276 (cache_freev_fn )strb_free ,
257277 global_err );
258278 if (mem_cache == NULL ) {
@@ -261,11 +281,11 @@ cuda_context *cuda_make_ctx(CUcontext ctx, int flags) {
261281 goto fail_disk_cache ;
262282 }
263283 res -> disk_cache = cache_disk (cache_path , mem_cache ,
264- (kwrite_fn )key_write ,
284+ (kwrite_fn )disk_write ,
265285 (vwrite_fn )kernel_write ,
266- (kread_fn )key_read ,
286+ (kread_fn )disk_read ,
267287 (vread_fn )kernel_read ,
268- res -> err );
288+ global_err );
269289 if (res -> disk_cache == NULL ) {
270290 fprintf (stderr , "Error initializing disk cache, disabling: %s\n" ,
271291 global_err -> msg );
@@ -1190,8 +1210,8 @@ static int make_bin(cuda_context *ctx, const strb *ptx, strb *bin, strb *log) {
11901210static int compile (cuda_context * ctx , strb * src , strb * bin , strb * log ) {
11911211 strb ptx = STRB_STATIC_INIT ;
11921212 strb * cbin ;
1193- kernel_key k ;
1194- kernel_key * pk ;
1213+ disk_key k ;
1214+ disk_key * pk ;
11951215
11961216 memset (& k , 0 , sizeof (k ));
11971217 k .version = 0 ;
@@ -1217,36 +1237,36 @@ static int compile(cuda_context *ctx, strb *src, strb* bin, strb *log) {
12171237 GA_CHECK (make_bin (ctx , & ptx , bin , log ));
12181238
12191239 if (ctx -> disk_cache ) {
1220- pk = calloc (sizeof (kernel_key ), 1 );
1240+ pk = calloc (sizeof (disk_key ), 1 );
12211241 if (pk == NULL ) {
12221242 error_sys (ctx -> err , "calloc" );
12231243 fprintf (stderr , "Error adding kernel to disk cache: %s\n" ,
12241244 ctx -> err -> msg );
12251245 return GA_NO_ERROR ;
12261246 }
1227- memcpy (pk , & k , KERNEL_KEY_MM );
1247+ memcpy (pk , & k , DISK_KEY_MM );
12281248 strb_appendb (& pk -> src , src );
12291249 if (strb_error (& pk -> src )) {
12301250 error_sys (ctx -> err , "strb_appendb" );
12311251 fprintf (stderr , "Error adding kernel to disk cache %s\n" ,
12321252 ctx -> err -> msg );
1233- key_free ((cache_key_t )pk );
1253+ disk_free ((cache_key_t )pk );
12341254 return GA_NO_ERROR ;
12351255 }
12361256 cbin = strb_alloc (bin -> l );
12371257 if (cbin == NULL ) {
12381258 error_sys (ctx -> err , "strb_alloc" );
12391259 fprintf (stderr , "Error adding kernel to disk cache: %s\n" ,
12401260 ctx -> err -> msg );
1241- key_free ((cache_key_t )pk );
1261+ disk_free ((cache_key_t )pk );
12421262 return GA_NO_ERROR ;
12431263 }
12441264 strb_appendb (cbin , bin );
12451265 if (strb_error (cbin )) {
12461266 error_sys (ctx -> err , "strb_appendb" );
12471267 fprintf (stderr , "Error adding kernel to disk cache %s\n" ,
12481268 ctx -> err -> msg );
1249- key_free ((cache_key_t )pk );
1269+ disk_free ((cache_key_t )pk );
12501270 strb_free (cbin );
12511271 return GA_NO_ERROR ;
12521272 }
@@ -1284,8 +1304,9 @@ static int cuda_newkernel(gpukernel **k, gpucontext *c, unsigned int count,
12841304 strb src = STRB_STATIC_INIT ;
12851305 strb bin = STRB_STATIC_INIT ;
12861306 strb log = STRB_STATIC_INIT ;
1287- strb * psrc ;
12881307 gpukernel * res ;
1308+ kernel_key k_key ;
1309+ kernel_key * p_key ;
12891310 CUdevice dev ;
12901311 CUresult err ;
12911312 unsigned int i ;
@@ -1350,7 +1371,10 @@ static int cuda_newkernel(gpukernel **k, gpucontext *c, unsigned int count,
13501371 return error_sys (ctx -> err , "strb" );
13511372 }
13521373
1353- res = (gpukernel * )cache_get (ctx -> kernel_cache , & src );
1374+ k_key .fname = fname ;
1375+ k_key .src = src ;
1376+
1377+ res = (gpukernel * )cache_get (ctx -> kernel_cache , & k_key );
13541378 if (res != NULL ) {
13551379 res -> refcnt ++ ;
13561380 strb_clear (& src );
@@ -1434,13 +1458,19 @@ static int cuda_newkernel(gpukernel **k, gpucontext *c, unsigned int count,
14341458 ctx -> refcnt ++ ;
14351459 cuda_exit (ctx );
14361460 TAG_KER (res );
1437- psrc = memdup (& src , sizeof (strb ));
1438- if (psrc != NULL ) {
1439- /* One of the refs is for the cache */
1440- res -> refcnt ++ ;
1441- /* If this fails, it will free the key and remove a ref from the
1442- kernel. */
1443- cache_add (ctx -> kernel_cache , psrc , res );
1461+ p_key = memdup (& k_key , sizeof (kernel_key ));
1462+ if (p_key != NULL ) {
1463+ p_key -> fname = strdup (fname );
1464+ if (p_key -> fname != NULL ) {
1465+ /* One of the refs is for the cache */
1466+ res -> refcnt ++ ;
1467+ /* If this fails, it will free the key and remove a ref from the
1468+ kernel. */
1469+ cache_add (ctx -> kernel_cache , p_key , res );
1470+ } else {
1471+ free (p_key );
1472+ strb_clear (& src );
1473+ }
14441474 } else {
14451475 strb_clear (& src );
14461476 }
0 commit comments