@@ -119,6 +119,59 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
119119#endif 
120120}
121121
122+ #if  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
123+ int  ggml_cuda_parse_id (char  devName[]) {
124+     //  A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
125+     //  these values are not stable so this is susceptible to breakage
126+     //  https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
127+     int  archMajor = 0x0 ;
128+     int  archMinor = 0x0 ;
129+     int  archNum = GGML_CUDA_CC_OFFSET_AMD;
130+     int  archLen = strlen (devName);
131+     char  archName[archLen + 1 ];
132+ 
133+     //  strip leading 'gfx' while copying into our buffer
134+     if  (archLen > 3 ) {
135+         strcpy (archName, &devName[3 ]);
136+         archLen -= 3 ;
137+     }
138+ 
139+     //  trim trailing :xnack- or :sramecc- statuses
140+     archLen = strcspn (archName, " :"  );
141+     archName[archLen] = ' \0 '  ;
142+ 
143+     //  tease out the version information
144+     if  (archLen > 8 ) {
145+         //  versions labeled generic use '-' as delimiter
146+         //  strip the trailing "-generic" then iterate through what remains
147+         if  (strstr (archName, " -generic"  )) {
148+             archName[archLen - 8 ] = ' \0 '  ;
149+             char  * pch;
150+             if  (pch = strtok (archName, " -"  )) {
151+                 archMajor = (int )strtoul (pch, 0 , 16 );
152+                 if  (pch = strtok (NULL , " -"  )) {
153+                     archMinor = 0x10  * (int )strtoul (pch, 0 , 16 );
154+                 }
155+             }
156+         }
157+     } else  if  (archLen >= 3 ) {
158+         //  last two digits should be the minor * 0x10 + stepping
159+         archMinor = (int )strtoul (&archName[archLen - 2 ], 0 , 16 );
160+         archName[archLen - 2 ] = ' \0 '  ;
161+ 
162+         //  only the major version remains
163+         archMajor = (int )strtoul (archName, 0 , 16 );
164+     }
165+     archNum += archMajor * 0x100 ;
166+ 
167+     //  be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
168+     if  (archMajor != 8 ) {
169+        archNum += archMinor;
170+     }
171+     return  archNum;
172+ }
173+ #endif  //  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
174+ 
122175static  ggml_cuda_device_info ggml_cuda_init () {
123176#ifdef  __HIP_PLATFORM_AMD__
124177    //  Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -169,7 +222,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
169222
170223        cudaDeviceProp prop;
171224        CUDA_CHECK (cudaGetDeviceProperties (&prop, id));
172-         GGML_LOG_INFO ("   Device %d: %s, compute capability %d.%d, VMM: %s\n "  , id, prop.name , prop.major , prop.minor , device_vmm ? " yes"   : " no"  );
173225
174226        info.default_tensor_split [id] = total_vram;
175227        total_vram += prop.totalGlobalMem ;
@@ -178,10 +230,29 @@ static ggml_cuda_device_info ggml_cuda_init() {
178230        info.devices [id].smpb   = prop.sharedMemPerBlock ;
179231#if  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
180232        info.devices [id].smpbo  = prop.sharedMemPerBlock ;
181-         info.devices [id].cc  = 100 *prop.major  + 10 *prop.minor  + GGML_CUDA_CC_OFFSET_AMD;
233+ 
234+         info.devices [id].cc  = ggml_cuda_parse_id (prop.gcnArchName );
235+         if  ((info.devices [id].cc  & 0xff00 ) == 0x0 ) {
236+             GGML_LOG_WARN (" invalid architecture ID received for device %d %s: %d  cc %d.%d\n "  ,
237+                             id, prop.name , prop.gcnArchName , prop.major , prop.minor );
238+ 
239+             //  Fallback to prop.major and prop.minor
240+             if  (prop.major  > 0 ) {
241+                 info.devices [id].cc  = GGML_CUDA_CC_OFFSET_AMD + prop.major  * 0x100 ;
242+ 
243+                 //  be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
244+                 if  (prop.minor  != 8 ) {
245+                     info.devices [id].cc  += prop.minor  * 0x10 ;
246+                 }
247+             }
248+         }
249+         GGML_LOG_INFO ("   Device %d: %s, %s (0x%x), VMM: %s\n "  ,
250+                         id, prop.name , prop.gcnArchName , info.devices [id].cc  & 0xffff , device_vmm ? " yes"   : " no"  );
182251#else 
183252        info.devices [id].smpbo  = prop.sharedMemPerBlockOptin ;
184253        info.devices [id].cc  = 100 *prop.major  + 10 *prop.minor ;
254+         GGML_LOG_INFO ("   Device %d: %s, compute capability %d.%d, VMM: %s\n "  ,
255+                         id, prop.name , prop.major , prop.minor , device_vmm ? " yes"   : " no"  );
185256#endif  //  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
186257    }
187258
0 commit comments