Skip to content

Commit f081482

Browse files
authored
Merge pull request #57 from JamesHeinrich/master
SRBase fixes for RX 6xxx and 7xxxx
2 parents a12cef0 + b47a478 commit f081482

File tree

4 files changed

+74
-16
lines changed

4 files changed

+74
-16
lines changed

README.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ Submitting results:
367367

368368
- the '-d c' option fails for some CPUs; this is under investigation
369369

370+
- certain 15-bit Barrett kernels are incompatible with RDNA 2 and RDNA 3 GPUs,
371+
and need to be ported to 32 bits
372+
370373
- some have reported mfakto does not work on certain Nvidia hardware; this is
371374
also being investigated
372375

src/mfaktc.c

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ GPU_type gpu_types[]={
6161
{GPU_GCN5, 64, "GCN5"},
6262
{GPU_GCNF, 64, "GCNF"},
6363
{GPU_RDNA, 64, "RDNA"},
64+
{GPU_RDNA2, 64, "RDNA2"},
6465
{GPU_RDNA3, 64, "RDNA3"},
66+
{GPU_RDNA4, 64, "RDNA4"},
6567
{GPU_APU, 80, "APU"},
6668
{GPU_CPU, 1, "CPU"},
6769
{GPU_NVIDIA, 8, "NVIDIA"},
@@ -289,7 +291,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
289291
UNKNOWN_KERNEL,
290292
UNKNOWN_KERNEL },
291293
{
292-
/* GPU_GCN4 (Ellesmere/Lexa/Baffin) (only barrett tested) */
294+
/* GPU_GCN4 (Ellesmere/Lexa/Baffin) (only Barrett kernels tested) */
293295
BARRETT69_MUL15,
294296
BARRETT70_MUL15,
295297
BARRETT71_MUL15,
@@ -311,7 +313,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
311313
UNKNOWN_KERNEL,
312314
UNKNOWN_KERNEL },
313315
{
314-
/* GPU_GCN5 (Vega 56/Vega 64/"Vega" Ryzen 2xxx-3xxx iGPU) (only barrett tested) */
316+
/* GPU_GCN5 (Vega 56/Vega 64/"Vega" Ryzen 2xxx-3xxx iGPU) (only Barrett kernels tested) */
315317
BARRETT69_MUL15,
316318
BARRETT70_MUL15,
317319
BARRETT71_MUL15,
@@ -333,7 +335,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
333335
UNKNOWN_KERNEL,
334336
UNKNOWN_KERNEL },
335337
{
336-
/* GPU_GCNF (Last GCN - Radeon VII) (only barrett tested) */
338+
/* GPU_GCNF (Last GCN - Radeon VII) (only Barrett kernels tested) */
337339
BARRETT76_MUL32,
338340
BARRETT77_MUL32,
339341
BARRETT87_MUL32,
@@ -354,30 +356,74 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
354356
MG88,
355357
UNKNOWN_KERNEL,
356358
UNKNOWN_KERNEL },
357-
{
358-
/* GPU_RDNA (1st/2nd gen RDNA) (only barett tested) */
359+
{
360+
/* GPU_RDNA (1st gen RDNA) (does not like all 15-bit Barrett kernels) */
361+
BARRETT76_MUL32,
362+
BARRETT77_MUL32,
363+
BARRETT87_MUL32,
364+
BARRETT88_MUL32,
365+
BARRETT73_MUL15,
366+
BARRETT74_MUL15,
367+
BARRETT79_MUL32,
368+
BARRETT92_MUL32,
369+
MG62,
370+
_63BIT_MUL24,
371+
_71BIT_MUL24,
372+
MG88,
373+
UNKNOWN_KERNEL,
374+
UNKNOWN_KERNEL,
375+
UNKNOWN_KERNEL,
376+
UNKNOWN_KERNEL,
377+
UNKNOWN_KERNEL,
378+
UNKNOWN_KERNEL,
379+
UNKNOWN_KERNEL,
380+
UNKNOWN_KERNEL },
381+
{
382+
/* GPU_RDNA2 (2nd gen RDNA) */
359383
BARRETT69_MUL15,
360384
BARRETT70_MUL15,
361385
BARRETT71_MUL15,
386+
BARRETT76_MUL32,
387+
BARRETT77_MUL32,
388+
BARRETT87_MUL32,
389+
BARRETT88_MUL32,
362390
BARRETT73_MUL15,
363391
BARRETT74_MUL15,
392+
BARRETT79_MUL32,
393+
BARRETT92_MUL32,
394+
MG62,
395+
_63BIT_MUL24,
396+
_71BIT_MUL24,
397+
MG88,
398+
UNKNOWN_KERNEL,
399+
UNKNOWN_KERNEL,
400+
UNKNOWN_KERNEL,
401+
UNKNOWN_KERNEL,
402+
UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
403+
{
404+
/* GPU_RDNA3 (3rd gen RDNA) (only Barettt tested) */
405+
BARRETT69_MUL15,
406+
BARRETT70_MUL15,
407+
BARRETT71_MUL15,
364408
BARRETT76_MUL32,
365409
BARRETT77_MUL32,
366-
BARRETT82_MUL15,
367-
BARRETT83_MUL15,
368410
BARRETT87_MUL32,
369411
BARRETT88_MUL32,
412+
BARRETT73_MUL15,
413+
BARRETT74_MUL15,
370414
BARRETT79_MUL32,
371-
BARRETT88_MUL15,
372415
BARRETT92_MUL32,
373416
MG62,
374417
_63BIT_MUL24,
375418
_71BIT_MUL24,
376419
MG88,
377420
UNKNOWN_KERNEL,
378-
UNKNOWN_KERNEL },
379-
{
380-
/* GPU_RDNA3 (3rd gen RDNA) (only barett tested) */
421+
UNKNOWN_KERNEL,
422+
UNKNOWN_KERNEL,
423+
UNKNOWN_KERNEL,
424+
UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
425+
{
426+
/* GPU_RDNA4 (4th gen RDNA) (only Barrett kernels tested) */
381427
BARRETT69_MUL15,
382428
BARRETT70_MUL15,
383429
BARRETT71_MUL15,
@@ -398,7 +444,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
398444
UNKNOWN_KERNEL,
399445
UNKNOWN_KERNEL,
400446
UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
401-
{
447+
{
402448
/* GPU_APU, (BeaverCreek=???, v=4) */
403449
BARRETT70_MUL15, // "cl_barrett15_70" (79.66 M/s)
404450
BARRETT69_MUL15, // "cl_barrett15_69" (78.40 M/s)
@@ -421,7 +467,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
421467
UNKNOWN_KERNEL,
422468
UNKNOWN_KERNEL,
423469
UNKNOWN_KERNEL },
424-
{
470+
{
425471
/* GPU_CPU, i7 620M @ 3.06GHz */
426472
MG62, // "cl_mg_62" (9.60 M/s)
427473
BARRETT77_MUL32, // "cl_barrett32_77" (5.54 M/s)
@@ -433,7 +479,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
433479
BARRETT70_MUL15, // "cl_barrett15_70" (3.60 M/s)
434480
BARRETT92_MUL32, // "cl_barrett32_92" (3.56 M/s)
435481
BARRETT71_MUL15, // "cl_barrett15_71" (3.43 M/s)
436-
// BARRETT70_MUL24, // "cl_barrett24_70" (3.40 M/s)
482+
// BARRETT70_MUL24, // "cl_barrett24_70" (3.40 M/s)
437483
BARRETT73_MUL15, // "cl_barrett15_73" (3.07 M/s)
438484
BARRETT74_MUL15, // "cl_barrett15_74"
439485
BARRETT82_MUL15, // "cl_barrett15_82" (2.72 M/s)

src/mfakto.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -749,17 +749,19 @@ void set_gpu_type()
749749
mystuff.gpu_type = GPU_GCNF;
750750
}
751751
else if (STM("gfx101") || // RDNA1
752-
STM("gfx103") || // RDNA2
753752

754753
PAT("RX [56][0-9][0-9][0-9]") // Model
755754
// Also known as 6[0-9]0M, but might be too vague to match
756755
)
757756
{
758757
mystuff.gpu_type = GPU_RDNA;
758+
}
759+
else if (STM("gfx103")) // RDNA2
760+
{
761+
mystuff.gpu_type = GPU_RDNA2;
759762
}
760763
else if (STM("gfx110") || // Catch-all RDNA3
761764
STM("gfx115") || // Catch-all RDNA3.5
762-
STM("gfx120") || // Catch-all RDNA4
763765

764766
PAT("RX [79][0-9][0-9][0-9]") || // Model
765767
PAT("80[456]0S") // Strix Halo, huge APU
@@ -768,6 +770,11 @@ void set_gpu_type()
768770
{
769771
mystuff.gpu_type = GPU_RDNA3;
770772
}
773+
else if (STM("gfx120")) // Catch-all RDNA4
774+
{
775+
mystuff.gpu_type = GPU_RDNA4;
776+
}
777+
771778
else if (STM("Cayman") || // 6950, 6970
772779
STM("Devastator") || // 7xx0D (iGPUs of A4/6/8/10)
773780
STM("Scrapper") || // 7xx0G (iGPUs of A4/6/8/10)

src/my_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,9 @@ enum GPU_types
176176
GPU_GCN5,
177177
GPU_GCNF, // R VII
178178
GPU_RDNA,
179+
GPU_RDNA2,
179180
GPU_RDNA3,
181+
GPU_RDNA4,
180182
GPU_APU,
181183
GPU_CPU,
182184
GPU_NVIDIA,

0 commit comments

Comments
 (0)