@@ -119,6 +119,55 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
119119#endif
120120}
121121
122+ #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
123+ static int ggml_cuda_parse_id (char devName[]) {
124+ // A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
125+ // these values are not stable so this is susceptible to breakage
126+ // https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
127+ int archMajor = 0x0 ;
128+ int archMinor = 0x0 ;
129+ int archNum = GGML_CUDA_CC_OFFSET_AMD;
130+ int archLen = strlen (devName);
131+ char archName[archLen + 1 ];
132+
133+ // strip leading 'gfx' while copying into our buffer
134+ if (archLen > 3 ) {
135+ strcpy (archName, &devName[3 ]);
136+ archLen -= 3 ;
137+ }
138+
139+ // trim trailing :xnack- or :sramecc- statuses
140+ archLen = strcspn (archName, " :" );
141+ archName[archLen] = ' \0 ' ;
142+
143+ // tease out the version information
144+ if (archLen > 8 ) {
145+ // versions labeled generic use '-' as delimiter
146+ // strip the trailing "-generic" then iterate through what remains
147+ if ((strstr (archName, " -generic" ))) {
148+ archName[archLen - 8 ] = ' \0 ' ;
149+ char * pch;
150+ if ((pch = strtok (archName, " -" ))) {
151+ archMajor = (int )strtoul (pch, 0 , 16 );
152+ if ((pch = strtok (NULL , " -" ))) {
153+ archMinor = 0x10 * (int )strtoul (pch, 0 , 16 );
154+ }
155+ }
156+ }
157+ } else if (archLen >= 3 ) {
158+ // last two digits should be the minor * 0x10 + stepping
159+ archMinor = (int )strtoul (&archName[archLen - 2 ], 0 , 16 );
160+ archName[archLen - 2 ] = ' \0 ' ;
161+
162+ // only the major version remains
163+ archMajor = (int )strtoul (archName, 0 , 16 );
164+ }
165+ archNum += archMajor * 0x100 ;
166+ archNum += archMinor;
167+ return archNum;
168+ }
169+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
170+
122171static ggml_cuda_device_info ggml_cuda_init () {
123172#ifdef __HIP_PLATFORM_AMD__
124173 // Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -169,7 +218,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
169218
170219 cudaDeviceProp prop;
171220 CUDA_CHECK (cudaGetDeviceProperties (&prop, id));
172- GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s\n " , id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" );
173221
174222 info.default_tensor_split [id] = total_vram;
175223 total_vram += prop.totalGlobalMem ;
@@ -178,10 +226,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
178226 info.devices [id].smpb = prop.sharedMemPerBlock ;
179227#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
180228 info.devices [id].smpbo = prop.sharedMemPerBlock ;
181- info.devices [id].cc = 100 *prop.major + 10 *prop.minor + GGML_CUDA_CC_OFFSET_AMD;
229+
230+ info.devices [id].cc = ggml_cuda_parse_id (prop.gcnArchName );
231+ if ((info.devices [id].cc & 0xff00 ) == 0x0 ) {
232+ GGML_LOG_WARN (" invalid architecture ID received for device %d %s: %s cc %d.%d\n " ,
233+ id, prop.name , prop.gcnArchName , prop.major , prop.minor );
234+
235+ // Fallback to prop.major and prop.minor
236+ if (prop.major > 0 ) {
237+ info.devices [id].cc = GGML_CUDA_CC_OFFSET_AMD + prop.major * 0x100 ;
238+ info.devices [id].cc += prop.minor * 0x10 ;
239+ }
240+ }
241+ GGML_LOG_INFO (" Device %d: %s, %s (0x%x), VMM: %s\n " ,
242+ id, prop.name , prop.gcnArchName , info.devices [id].cc & 0xffff , device_vmm ? " yes" : " no" );
182243#else
183244 info.devices [id].smpbo = prop.sharedMemPerBlockOptin ;
184245 info.devices [id].cc = 100 *prop.major + 10 *prop.minor ;
246+ GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s\n " ,
247+ id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" );
185248#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
186249 }
187250
0 commit comments