@@ -636,28 +636,36 @@ int init_CL(int num_streams, cl_int *devnumber)
636636}
637637
638638/*
639- * set_gpu_type
640- * try to extract the GPU type from the device info
641- * type informs our kernel selection due to perf and compilability issues and #defines
642- * such as USE_DP
643- *
644- * broadly speaking, each group of architectures with a certain amount of int32 mul
645- * relative to other parts should get its own type.
639+ * set_gpu_type: tries to determine the GPU type from the device info
646640 *
647- * the "APU" type is only for VLIW. newer GPUs seem to just match the arch. they mainly
648- * lose out on memory due to no L3, but we don't use much memory bus anyways.
641+ * - the GPU type is used in kernel selection to minimize compatibility and
642+ * performance issues, and enables certain compilation options such as USE_DP
643+ *
644+ * - devices are grouped by architecture and 32-bit integer multiplication
645+ * performance relative to the device as a whole; each group should get its
646+ * own type, broadly speaking
647+ *
648+ * - the "APU" type is only for VLIW-based processors as newer GPUs seem to
649+ * just match the architecture. APUs depend more on system memory due to the
650+ * lack of an L3 cache, but mfakto does not use the memory bus much anyway
649651 */
650652void set_gpu_type ()
651653{
652654#define PAT (b ) patmatch(deviceinfo.d_name,b,0 )
653655#define STM (b ) strstr(deviceinfo.d_name,b)
656+
657+ // attempt to automatically detect the type of GPU
654658 if (mystuff.gpu_type == GPU_AUTO)
655659 {
656- // try to auto-detect the type of GPU
657- // There are basically two styles of names:
658- // 1) Model: "Radeon HD 7770", "Radeon R7 260X", "Radeon R9 290X", "Radeon RX 6800 XT"
659- // 2) Codename: "Capeverde", "Pitcairn", "Tahiti", "Hawaii", "Ellesmere", "gfx900", "gfx1010"
660- // The first type is typical of mac and Windows. The second type is typical of Linux.
660+ // clGetDeviceInfo() basically returns two styles of device names:
661+ //
662+ // 1) models, such as: "Radeon HD 7770", "Radeon R7 260X",
663+ // "Radeon R9 290X" and "Radeon RX 6800 XT"
664+ // 2) codenames, such as: "Capeverde", "Pitcairn", "Tahiti", "Hawaii",
665+ // "Ellesmere", "gfx900" and "gfx1010"
666+ //
667+ // macOS and Windows drivers typically report the model name, and Linux
668+ // drivers tend to report the internal codename.
661669
662670 if (STM (" Capeverde" ) || // 7730, 7750, 7770, 8760, 8740, R7 250X
663671 STM (" Pitcairn" ) || // 7850, 7870, 8870
@@ -674,7 +682,7 @@ void set_gpu_type()
674682 STM (" Antigua" ) || // R9 380(X)
675683 STM (" Kalindi" ) || // GCN APU, Kabini, R7 ???
676684 PAT (" D[357]00" ) || // FirePro D-series
677- PAT (" HD [78][0-7][0-9][0-9]" )
685+ PAT (" HD [78][0-7][0-9][0-9]" )
678686 )
679687 {
680688 mystuff.gpu_type = GPU_GCN;
@@ -733,7 +741,7 @@ void set_gpu_type()
733741 PAT (" gfx90[ac]" ) || // CDNA2: MI210, MI250(X)
734742 STM (" gfx942" ) || // CDNA3: MI300(A/X)
735743 STM (" gfx950" ) || // CDNA4: MI350/355
736-
744+
737745 PAT (" MI[5-6]0" ) || // MI50, MI60
738746 PAT (" MI[0-9][0-9][0-9]" ) // Any MI with three digits
739747 )
0 commit comments