@@ -119,6 +119,55 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
119119#endif 
120120}
121121
122+ #if  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
123+ static  int  ggml_cuda_parse_id (char  devName[]) {
124+     //  A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
125+     //  these values are not stable so this is susceptible to breakage
126+     //  https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
127+     int  archMajor = 0x0 ;
128+     int  archMinor = 0x0 ;
129+     int  archNum = GGML_CUDA_CC_OFFSET_AMD;
130+     int  archLen = strlen (devName);
131+     char  archName[archLen + 1 ];
132+ 
133+     //  strip leading 'gfx' while copying into our buffer
134+     if  (archLen > 3 ) {
135+         strcpy (archName, &devName[3 ]);
136+         archLen -= 3 ;
137+     }
138+ 
139+     //  trim trailing :xnack- or :sramecc- statuses
140+     archLen = strcspn (archName, " :"  );
141+     archName[archLen] = ' \0 '  ;
142+ 
143+     //  tease out the version information
144+     if  (archLen > 8 ) {
145+         //  versions labeled generic use '-' as delimiter
146+         //  strip the trailing "-generic" then iterate through what remains
147+         if  ((strstr (archName, " -generic"  ))) {
148+             archName[archLen - 8 ] = ' \0 '  ;
149+             char  * pch;
150+             if  ((pch = strtok (archName, " -"  ))) {
151+                 archMajor = (int )strtoul (pch, 0 , 16 );
152+                 if  ((pch = strtok (NULL , " -"  ))) {
153+                     archMinor = 0x10  * (int )strtoul (pch, 0 , 16 );
154+                 }
155+             }
156+         }
157+     } else  if  (archLen >= 3 ) {
158+         //  last two digits should be the minor * 0x10 + stepping
159+         archMinor = (int )strtoul (&archName[archLen - 2 ], 0 , 16 );
160+         archName[archLen - 2 ] = ' \0 '  ;
161+ 
162+         //  only the major version remains
163+         archMajor = (int )strtoul (archName, 0 , 16 );
164+     }
165+     archNum += archMajor * 0x100 ;
166+     archNum += archMinor;
167+     return  archNum;
168+ }
169+ #endif  //  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
170+ 
122171static  ggml_cuda_device_info ggml_cuda_init () {
123172#ifdef  __HIP_PLATFORM_AMD__
124173    //  Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -169,7 +218,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
169218
170219        cudaDeviceProp prop;
171220        CUDA_CHECK (cudaGetDeviceProperties (&prop, id));
172-         GGML_LOG_INFO ("   Device %d: %s, compute capability %d.%d, VMM: %s\n "  , id, prop.name , prop.major , prop.minor , device_vmm ? " yes"   : " no"  );
173221
174222        info.default_tensor_split [id] = total_vram;
175223        total_vram += prop.totalGlobalMem ;
@@ -178,10 +226,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
178226        info.devices [id].smpb   = prop.sharedMemPerBlock ;
179227#if  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
180228        info.devices [id].smpbo  = prop.sharedMemPerBlock ;
181-         info.devices [id].cc  = 100 *prop.major  + 10 *prop.minor  + GGML_CUDA_CC_OFFSET_AMD;
229+ 
230+         info.devices [id].cc  = ggml_cuda_parse_id (prop.gcnArchName );
231+         if  ((info.devices [id].cc  & 0xff00 ) == 0x0 ) {
232+             GGML_LOG_WARN (" invalid architecture ID received for device %d %s: %s  cc %d.%d\n "  ,
233+                             id, prop.name , prop.gcnArchName , prop.major , prop.minor );
234+ 
235+             //  Fallback to prop.major and prop.minor
236+             if  (prop.major  > 0 ) {
237+                 info.devices [id].cc  = GGML_CUDA_CC_OFFSET_AMD + prop.major  * 0x100 ;
238+                 info.devices [id].cc  += prop.minor  * 0x10 ;
239+             }
240+         }
241+         GGML_LOG_INFO ("   Device %d: %s, %s (0x%x), VMM: %s\n "  ,
242+                         id, prop.name , prop.gcnArchName , info.devices [id].cc  & 0xffff , device_vmm ? " yes"   : " no"  );
182243#else 
183244        info.devices [id].smpbo  = prop.sharedMemPerBlockOptin ;
184245        info.devices [id].cc  = 100 *prop.major  + 10 *prop.minor ;
246+         GGML_LOG_INFO ("   Device %d: %s, compute capability %d.%d, VMM: %s\n "  ,
247+                         id, prop.name , prop.major , prop.minor , device_vmm ? " yes"   : " no"  );
185248#endif  //  defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
186249    }
187250
0 commit comments