@@ -61,7 +61,9 @@ GPU_type gpu_types[]={
6161 {GPU_GCN5 , 64 , "GCN5" },
6262 {GPU_GCNF , 64 , "GCNF" },
6363 {GPU_RDNA , 64 , "RDNA" },
64+ {GPU_RDNA2 , 64 , "RDNA2" },
6465 {GPU_RDNA3 , 64 , "RDNA3" },
66+ {GPU_RDNA4 , 64 , "RDNA4" },
6567 {GPU_APU , 80 , "APU" },
6668 {GPU_CPU , 1 , "CPU" },
6769 {GPU_NVIDIA , 8 , "NVIDIA" },
@@ -289,7 +291,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
289291 UNKNOWN_KERNEL ,
290292 UNKNOWN_KERNEL },
291293 {
292- /* GPU_GCN4 (Ellesmere/Lexa/Baffin) (only barrett tested) */
294+ /* GPU_GCN4 (Ellesmere/Lexa/Baffin) (only Barrett kernels tested) */
293295 BARRETT69_MUL15 ,
294296 BARRETT70_MUL15 ,
295297 BARRETT71_MUL15 ,
@@ -311,7 +313,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
311313 UNKNOWN_KERNEL ,
312314 UNKNOWN_KERNEL },
313315 {
314- /* GPU_GCN5 (Vega 56/Vega 64/"Vega" Ryzen 2xxx-3xxx iGPU) (only barrett tested) */
316+ /* GPU_GCN5 (Vega 56/Vega 64/"Vega" Ryzen 2xxx-3xxx iGPU) (only Barrett kernels tested) */
315317 BARRETT69_MUL15 ,
316318 BARRETT70_MUL15 ,
317319 BARRETT71_MUL15 ,
@@ -333,7 +335,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
333335 UNKNOWN_KERNEL ,
334336 UNKNOWN_KERNEL },
335337 {
336- /* GPU_GCNF (Last GCN - Radeon VII) (only barrett tested) */
338+ /* GPU_GCNF (Last GCN - Radeon VII) (only Barrett kernels tested) */
337339 BARRETT76_MUL32 ,
338340 BARRETT77_MUL32 ,
339341 BARRETT87_MUL32 ,
@@ -354,30 +356,74 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
354356 MG88 ,
355357 UNKNOWN_KERNEL ,
356358 UNKNOWN_KERNEL },
357- {
358- /* GPU_RDNA (1st/2nd gen RDNA) (only barett tested) */
359+ {
360+ /* GPU_RDNA (1st gen RDNA) (does not like all 15-bit Barrett kernels) */
361+ BARRETT76_MUL32 ,
362+ BARRETT77_MUL32 ,
363+ BARRETT87_MUL32 ,
364+ BARRETT88_MUL32 ,
365+ BARRETT73_MUL15 ,
366+ BARRETT74_MUL15 ,
367+ BARRETT79_MUL32 ,
368+ BARRETT92_MUL32 ,
369+ MG62 ,
370+ _63BIT_MUL24 ,
371+ _71BIT_MUL24 ,
372+ MG88 ,
373+ UNKNOWN_KERNEL ,
374+ UNKNOWN_KERNEL ,
375+ UNKNOWN_KERNEL ,
376+ UNKNOWN_KERNEL ,
377+ UNKNOWN_KERNEL ,
378+ UNKNOWN_KERNEL ,
379+ UNKNOWN_KERNEL ,
380+ UNKNOWN_KERNEL },
381+ {
382+ /* GPU_RDNA2 (2nd gen RDNA) */
359383 BARRETT69_MUL15 ,
360384 BARRETT70_MUL15 ,
361385 BARRETT71_MUL15 ,
386+ BARRETT76_MUL32 ,
387+ BARRETT77_MUL32 ,
388+ BARRETT87_MUL32 ,
389+ BARRETT88_MUL32 ,
362390 BARRETT73_MUL15 ,
363391 BARRETT74_MUL15 ,
392+ BARRETT79_MUL32 ,
393+ BARRETT92_MUL32 ,
394+ MG62 ,
395+ _63BIT_MUL24 ,
396+ _71BIT_MUL24 ,
397+ MG88 ,
398+ UNKNOWN_KERNEL ,
399+ UNKNOWN_KERNEL ,
400+ UNKNOWN_KERNEL ,
401+ UNKNOWN_KERNEL ,
402+ UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
403+ {
404+ /* GPU_RDNA3 (3rd gen RDNA) (only Barettt tested) */
405+ BARRETT69_MUL15 ,
406+ BARRETT70_MUL15 ,
407+ BARRETT71_MUL15 ,
364408 BARRETT76_MUL32 ,
365409 BARRETT77_MUL32 ,
366- BARRETT82_MUL15 ,
367- BARRETT83_MUL15 ,
368410 BARRETT87_MUL32 ,
369411 BARRETT88_MUL32 ,
412+ BARRETT73_MUL15 ,
413+ BARRETT74_MUL15 ,
370414 BARRETT79_MUL32 ,
371- BARRETT88_MUL15 ,
372415 BARRETT92_MUL32 ,
373416 MG62 ,
374417 _63BIT_MUL24 ,
375418 _71BIT_MUL24 ,
376419 MG88 ,
377420 UNKNOWN_KERNEL ,
378- UNKNOWN_KERNEL },
379- {
380- /* GPU_RDNA3 (3rd gen RDNA) (only barett tested) */
421+ UNKNOWN_KERNEL ,
422+ UNKNOWN_KERNEL ,
423+ UNKNOWN_KERNEL ,
424+ UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
425+ {
426+ /* GPU_RDNA4 (4th gen RDNA) (only Barrett kernels tested) */
381427 BARRETT69_MUL15 ,
382428 BARRETT70_MUL15 ,
383429 BARRETT71_MUL15 ,
@@ -398,7 +444,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
398444 UNKNOWN_KERNEL ,
399445 UNKNOWN_KERNEL ,
400446 UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
401- {
447+ {
402448/* GPU_APU, (BeaverCreek=???, v=4) */
403449 BARRETT70_MUL15 , // "cl_barrett15_70" (79.66 M/s)
404450 BARRETT69_MUL15 , // "cl_barrett15_69" (78.40 M/s)
@@ -421,7 +467,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
421467 UNKNOWN_KERNEL ,
422468 UNKNOWN_KERNEL ,
423469 UNKNOWN_KERNEL },
424- {
470+ {
425471/* GPU_CPU, i7 620M @ 3.06GHz */
426472 MG62 , // "cl_mg_62" (9.60 M/s)
427473 BARRETT77_MUL32 , // "cl_barrett32_77" (5.54 M/s)
@@ -433,7 +479,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
433479 BARRETT70_MUL15 , // "cl_barrett15_70" (3.60 M/s)
434480 BARRETT92_MUL32 , // "cl_barrett32_92" (3.56 M/s)
435481 BARRETT71_MUL15 , // "cl_barrett15_71" (3.43 M/s)
436- // BARRETT70_MUL24, // "cl_barrett24_70" (3.40 M/s)
482+ // BARRETT70_MUL24, // "cl_barrett24_70" (3.40 M/s)
437483 BARRETT73_MUL15 , // "cl_barrett15_73" (3.07 M/s)
438484 BARRETT74_MUL15 , // "cl_barrett15_74"
439485 BARRETT82_MUL15 , // "cl_barrett15_82" (2.72 M/s)
0 commit comments