@@ -119,6 +119,59 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
119119#endif
120120}
121121
122+ #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
123+ int ggml_cuda_parse_id (char devName[]) {
124+ // A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
125+ // these values are not stable so this is susceptible to breakage
126+ // https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
127+ int archMajor = 0x0 ;
128+ int archMinor = 0x0 ;
129+ int archNum = GGML_CUDA_CC_OFFSET_AMD;
130+ int archLen = strlen (devName);
131+ char archName[archLen + 1 ];
132+
133+ // strip leading 'gfx' while copying into our buffer
134+ if (archLen > 3 ) {
135+ strcpy (archName, &devName[3 ]);
136+ archLen -= 3 ;
137+ }
138+
139+ // trim trailing :xnack- or :sramecc- statuses
140+ archLen = strcspn (archName, " :" );
141+ archName[archLen] = ' \0 ' ;
142+
143+ // tease out the version information
144+ if (archLen > 8 ) {
145+ // versions labeled generic use '-' as delimiter
146+ // strip the trailing "-generic" then iterate through what remains
147+ if ((strstr (archName, " -generic" ))) {
148+ archName[archLen - 8 ] = ' \0 ' ;
149+ char * pch;
150+ if ((pch = strtok (archName, " -" ))) {
151+ archMajor = (int )strtoul (pch, 0 , 16 );
152+ if ((pch = strtok (NULL , " -" ))) {
153+ archMinor = 0x10 * (int )strtoul (pch, 0 , 16 );
154+ }
155+ }
156+ }
157+ } else if (archLen >= 3 ) {
158+ // last two digits should be the minor * 0x10 + stepping
159+ archMinor = (int )strtoul (&archName[archLen - 2 ], 0 , 16 );
160+ archName[archLen - 2 ] = ' \0 ' ;
161+
162+ // only the major version remains
163+ archMajor = (int )strtoul (archName, 0 , 16 );
164+ }
165+ archNum += archMajor * 0x100 ;
166+
167+ // be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
168+ if (archMajor != 8 ) {
169+ archNum += archMinor;
170+ }
171+ return archNum;
172+ }
173+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
174+
122175static ggml_cuda_device_info ggml_cuda_init () {
123176#ifdef __HIP_PLATFORM_AMD__
124177 // Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -169,7 +222,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
169222
170223 cudaDeviceProp prop;
171224 CUDA_CHECK (cudaGetDeviceProperties (&prop, id));
172- GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s\n " , id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" );
173225
174226 info.default_tensor_split [id] = total_vram;
175227 total_vram += prop.totalGlobalMem ;
@@ -178,10 +230,29 @@ static ggml_cuda_device_info ggml_cuda_init() {
178230 info.devices [id].smpb = prop.sharedMemPerBlock ;
179231#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
180232 info.devices [id].smpbo = prop.sharedMemPerBlock ;
181- info.devices [id].cc = 100 *prop.major + 10 *prop.minor + GGML_CUDA_CC_OFFSET_AMD;
233+
234+ info.devices [id].cc = ggml_cuda_parse_id (prop.gcnArchName );
235+ if ((info.devices [id].cc & 0xff00 ) == 0x0 ) {
236+ GGML_LOG_WARN (" invalid architecture ID received for device %d %s: %s cc %d.%d\n " ,
237+ id, prop.name , prop.gcnArchName , prop.major , prop.minor );
238+
239+ // Fallback to prop.major and prop.minor
240+ if (prop.major > 0 ) {
241+ info.devices [id].cc = GGML_CUDA_CC_OFFSET_AMD + prop.major * 0x100 ;
242+
243+ // be inclusive of the full gfx8 line for backward compatibility (Carrizu APUs, etc.)
244+ if (prop.minor != 8 ) {
245+ info.devices [id].cc += prop.minor * 0x10 ;
246+ }
247+ }
248+ }
249+ GGML_LOG_INFO (" Device %d: %s, %s (0x%x), VMM: %s\n " ,
250+ id, prop.name , prop.gcnArchName , info.devices [id].cc & 0xffff , device_vmm ? " yes" : " no" );
182251#else
183252 info.devices [id].smpbo = prop.sharedMemPerBlockOptin ;
184253 info.devices [id].cc = 100 *prop.major + 10 *prop.minor ;
254+ GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s\n " ,
255+ id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" );
185256#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
186257 }
187258
0 commit comments