@@ -186,12 +186,73 @@ def populate_mspec(self):
186186 self ._mspec .gpu_model = mi_gpu_specs .get_gpu_model (
187187 self ._mspec .gpu_arch , self ._mspec .gpu_chip_id
188188 )
189+
190+ if not self ._mspec .gpu_model :
191+ self ._mspec .gpu_model = self .detect_gpu_model (self ._mspec .gpu_arch )
192+
189193 self ._mspec .num_xcd = str (
190194 mi_gpu_specs .get_num_xcds (
191- self ._mspec .gpu_model , self ._mspec .compute_partition
195+ self ._mspec .gpu_arch , self . _mspec . gpu_model , self ._mspec .compute_partition
192196 )
193197 )
194198
199+ @demarcate
200+ def detect_gpu_model (self , gpu_arch ):
201+ """
202+ Detects the GPU model using various identifiers from 'amd-smi static'.
203+ Falls back through multiple methods if the primary method fails.
204+ """
205+
206+ from utils .specs import run , search
207+
208+ # TODO: use amd-smi python api when available
209+ amd_smi_static = run (["amd-smi" , "static" , "--gpu=0" ], exit_on_error = True )
210+
211+ # Purposely search for patterns without variants suffix to try and match a known GPU model.
212+ detection_methods = [
213+ {
214+ "name" : "Market Name" ,
215+ "pattern" : r"MARKET_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)" ,
216+ },
217+ {
218+ "name" : "VBIOS Name" ,
219+ "pattern" : r"NAME:\s*.*(mi|MI\d*[a-zA-Z]*)" ,
220+ },
221+ {"name" : "Product Name" , "pattern" : r"PRODUCT_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)" },
222+ ]
223+
224+ gpu_model = None
225+ for method in detection_methods :
226+ console_log (f"Determining GPU model using { method ['name' ]} ." )
227+ gpu_model = search (method ["pattern" ], amd_smi_static )
228+ if gpu_model :
229+ break
230+
231+ if not gpu_model :
232+ console_warning ("Unable to determine the GPU model." )
233+ return
234+
235+ gpu_model = self ._adjust_mi300_model (gpu_model .lower (), gpu_arch .lower ())
236+
237+ if gpu_model .lower () not in mi_gpu_specs .get_num_xcds_dict ().keys ():
238+ console_warning (f"Unknown GPU model detected: '{ gpu_model } '." )
239+ return
240+
241+ return gpu_model .upper ()
242+
243+ def _adjust_mi300_model (self , gpu_model , gpu_arch ):
244+ """
245+ Applies specific adjustments for MI300 series GPU models based on architecture.
246+ """
247+
248+ if gpu_model in ["mi300a" , "mi300x" ]:
249+ if gpu_arch in ["gfx940" , "gfx941" ]:
250+ gpu_model += "_a0"
251+ elif gpu_arch == "gfx942" :
252+ gpu_model += "_a1"
253+
254+ return gpu_model
255+
195256 @demarcate
196257 def detect_counters (self ):
197258 """
0 commit comments