Skip to content

Commit f0fad19

Browse files
authored
Add chip specs (#681)
* Add perfmon config spec, enhance memory partition info. * Add gfx950 perfmon config. * Add High Freq variants in gfx942. * Add backup detection methods for gpu model. * Improve get_num_xcds logic by adding detection of 1to1 arch-to-compute_partition logic. * Add default compute partition settings spx:8 for when gpu_model=None. * Update gpu spec tests. * Add backup compute partition detection. --------- Signed-off-by: xuchen-amd <xuchen@amd.com>
1 parent 45296ce commit f0fad19

File tree

13 files changed

+526
-176
lines changed

13 files changed

+526
-176
lines changed

src/rocprof_compute_soc/soc_base.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,73 @@ def populate_mspec(self):
186186
self._mspec.gpu_model = mi_gpu_specs.get_gpu_model(
187187
self._mspec.gpu_arch, self._mspec.gpu_chip_id
188188
)
189+
190+
if not self._mspec.gpu_model:
191+
self._mspec.gpu_model = self.detect_gpu_model(self._mspec.gpu_arch)
192+
189193
self._mspec.num_xcd = str(
190194
mi_gpu_specs.get_num_xcds(
191-
self._mspec.gpu_model, self._mspec.compute_partition
195+
self._mspec.gpu_arch, self._mspec.gpu_model, self._mspec.compute_partition
192196
)
193197
)
194198

199+
@demarcate
200+
def detect_gpu_model(self, gpu_arch):
201+
"""
202+
Detects the GPU model using various identifiers from 'amd-smi static'.
203+
Falls back through multiple methods if the primary method fails.
204+
"""
205+
206+
from utils.specs import run, search
207+
208+
# TODO: use amd-smi python api when available
209+
amd_smi_static = run(["amd-smi", "static", "--gpu=0"], exit_on_error=True)
210+
211+
# Purposely search for patterns without variants suffix to try and match a known GPU model.
212+
detection_methods = [
213+
{
214+
"name": "Market Name",
215+
"pattern": r"MARKET_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)",
216+
},
217+
{
218+
"name": "VBIOS Name",
219+
"pattern": r"NAME:\s*.*(mi|MI\d*[a-zA-Z]*)",
220+
},
221+
{"name": "Product Name", "pattern": r"PRODUCT_NAME:\s*.*(mi|MI\d*[a-zA-Z]*)"},
222+
]
223+
224+
gpu_model = None
225+
for method in detection_methods:
226+
console_log(f"Determining GPU model using {method['name']}.")
227+
gpu_model = search(method["pattern"], amd_smi_static)
228+
if gpu_model:
229+
break
230+
231+
if not gpu_model:
232+
console_warning("Unable to determine the GPU model.")
233+
return
234+
235+
gpu_model = self._adjust_mi300_model(gpu_model.lower(), gpu_arch.lower())
236+
237+
if gpu_model.lower() not in mi_gpu_specs.get_num_xcds_dict().keys():
238+
console_warning(f"Unknown GPU model detected: '{gpu_model}'.")
239+
return
240+
241+
return gpu_model.upper()
242+
243+
def _adjust_mi300_model(self, gpu_model, gpu_arch):
244+
"""
245+
Applies specific adjustments for MI300 series GPU models based on architecture.
246+
"""
247+
248+
if gpu_model in ["mi300a", "mi300x"]:
249+
if gpu_arch in ["gfx940", "gfx941"]:
250+
gpu_model += "_a0"
251+
elif gpu_arch == "gfx942":
252+
gpu_model += "_a1"
253+
254+
return gpu_model
255+
195256
@demarcate
196257
def detect_counters(self):
197258
"""

src/rocprof_compute_soc/soc_gfx906.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,9 @@
2222
# SOFTWARE.
2323
##############################################################################el
2424

25-
from pathlib import Path
26-
27-
import config
2825
from rocprof_compute_soc.soc_base import OmniSoC_Base
2926
from utils.logger import console_error, demarcate
27+
from utils.mi_gpu_spec import mi_gpu_specs
3028

3129

3230
class gfx906_soc(OmniSoC_Base):
@@ -35,20 +33,7 @@ def __init__(self, args, mspec):
3533
self.set_arch("gfx906")
3634
self.set_compatible_profilers(["rocprofv1", "rocscope"])
3735
# Per IP block max number of simultaneous counters. GFX IP Blocks
38-
self.set_perfmon_config(
39-
{
40-
"SQ": 8,
41-
"TA": 2,
42-
"TD": 2,
43-
"TCP": 4,
44-
"TCC": 4,
45-
"CPC": 2,
46-
"CPF": 2,
47-
"SPI": 2,
48-
"GRBM": 2,
49-
"GDS": 4,
50-
}
51-
)
36+
self.set_perfmon_config({mi_gpu_specs.get_perfmon_config("gfx906")})
5237

5338
# Set arch specific specs
5439
self._mspec._l2_banks = 16

src/rocprof_compute_soc/soc_gfx908.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import config
2828
from rocprof_compute_soc.soc_base import OmniSoC_Base
2929
from utils.logger import console_error, demarcate
30+
from utils.mi_gpu_spec import mi_gpu_specs
3031

3132

3233
class gfx908_soc(OmniSoC_Base):
@@ -37,20 +38,7 @@ def __init__(self, args, mspec):
3738
["rocprofv1", "rocscope", "rocprofv3", "rocprofiler-sdk"]
3839
)
3940
# Per IP block max number of simultaneous counters. GFX IP Blocks
40-
self.set_perfmon_config(
41-
{
42-
"SQ": 8,
43-
"TA": 2,
44-
"TD": 2,
45-
"TCP": 4,
46-
"TCC": 4,
47-
"CPC": 2,
48-
"CPF": 2,
49-
"SPI": 2,
50-
"GRBM": 2,
51-
"GDS": 4,
52-
}
53-
)
41+
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx908"))
5442

5543
# Set arch specific specs
5644
self._mspec._l2_banks = 32

src/rocprof_compute_soc/soc_gfx90a.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from rocprof_compute_soc.soc_base import OmniSoC_Base
2929
from roofline import Roofline
3030
from utils.logger import console_log, console_warning, demarcate
31+
from utils.mi_gpu_spec import mi_gpu_specs
3132
from utils.utils import mibench
3233

3334

@@ -50,20 +51,7 @@ def __init__(self, args, mspec):
5051
["rocprofv1", "rocscope", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
5152
)
5253
# Per IP block max number of simultaneous counters. GFX IP Blocks
53-
self.set_perfmon_config(
54-
{
55-
"SQ": 8,
56-
"TA": 2,
57-
"TD": 2,
58-
"TCP": 4,
59-
"TCC": 4,
60-
"CPC": 2,
61-
"CPF": 2,
62-
"SPI": 2,
63-
"GRBM": 2,
64-
"GDS": 4,
65-
}
66-
)
54+
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx90a"))
6755
# Create roofline object if mode is provided; skip for --specs
6856
if hasattr(self.get_args(), "mode") and self.get_args().mode:
6957
self.roofline_obj = Roofline(args, self._mspec)

src/rocprof_compute_soc/soc_gfx940.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from rocprof_compute_soc.soc_base import OmniSoC_Base
2929
from roofline import Roofline
3030
from utils.logger import console_error, console_log, console_warning, demarcate
31+
from utils.mi_gpu_spec import mi_gpu_specs
3132
from utils.utils import mibench
3233

3334

@@ -50,20 +51,7 @@ def __init__(self, args, mspec):
5051
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
5152
)
5253
# Per IP block max number of simultaneous counters. GFX IP Blocks
53-
self.set_perfmon_config(
54-
{
55-
"SQ": 8,
56-
"TA": 2,
57-
"TD": 2,
58-
"TCP": 4,
59-
"TCC": 4,
60-
"CPC": 2,
61-
"CPF": 2,
62-
"SPI": 2,
63-
"GRBM": 2,
64-
"GDS": 4,
65-
}
66-
)
54+
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx940"))
6755
# Create roofline object if mode is provided; skip for --specs
6856
if hasattr(self.get_args(), "mode") and self.get_args().mode:
6957
self.roofline_obj = Roofline(args, self._mspec)

src/rocprof_compute_soc/soc_gfx941.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from rocprof_compute_soc.soc_base import OmniSoC_Base
2929
from roofline import Roofline
3030
from utils.logger import console_error, console_log, console_warning, demarcate
31+
from utils.mi_gpu_spec import mi_gpu_specs
3132
from utils.utils import mibench
3233

3334

@@ -50,20 +51,7 @@ def __init__(self, args, mspec):
5051
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
5152
)
5253
# Per IP block max number of simultaneous counters. GFX IP Blocks
53-
self.set_perfmon_config(
54-
{
55-
"SQ": 8,
56-
"TA": 2,
57-
"TD": 2,
58-
"TCP": 4,
59-
"TCC": 4,
60-
"CPC": 2,
61-
"CPF": 2,
62-
"SPI": 2,
63-
"GRBM": 2,
64-
"GDS": 4,
65-
}
66-
)
54+
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx941"))
6755
# Create roofline object if mode is provided; skip for --specs
6856
if hasattr(self.get_args(), "mode") and self.get_args().mode:
6957
self.roofline_obj = Roofline(args, self._mspec)

src/rocprof_compute_soc/soc_gfx942.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from rocprof_compute_soc.soc_base import OmniSoC_Base
2929
from roofline import Roofline
3030
from utils.logger import console_error, console_log, console_warning, demarcate
31+
from utils.mi_gpu_spec import mi_gpu_specs
3132
from utils.utils import mibench
3233

3334

@@ -50,20 +51,7 @@ def __init__(self, args, mspec):
5051
["rocprofv1", "rocprofv2", "rocprofv3", "rocprofiler-sdk"]
5152
)
5253
# Per IP block max number of simultaneous counters. GFX IP Blocks
53-
self.set_perfmon_config(
54-
{
55-
"SQ": 8,
56-
"TA": 2,
57-
"TD": 2,
58-
"TCP": 4,
59-
"TCC": 4,
60-
"CPC": 2,
61-
"CPF": 2,
62-
"SPI": 2,
63-
"GRBM": 2,
64-
"GDS": 4,
65-
}
66-
)
54+
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx942"))
6755
# Create roofline object if mode is provided; skip for --specs
6856
if hasattr(self.get_args(), "mode") and self.get_args().mode:
6957
self.roofline_obj = Roofline(args, self._mspec)

src/rocprof_compute_soc/soc_gfx950.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
import config
2828
from rocprof_compute_soc.soc_base import OmniSoC_Base
2929
from roofline import Roofline
30-
from utils.logger import demarcate
31-
from utils.utils import console_error, console_log, console_warning, mibench
30+
from utils.logger import console_error, console_log, console_warning, demarcate
31+
from utils.mi_gpu_spec import mi_gpu_specs
32+
from utils.utils import mibench
3233

3334

3435
class gfx950_soc(OmniSoC_Base):
@@ -48,21 +49,7 @@ def __init__(self, args, mspec):
4849
)
4950
self.set_compatible_profilers(["rocprofv3", "rocprofiler-sdk"])
5051
# Per IP block max number of simultaneous counters. GFX IP Blocks
51-
self.set_perfmon_config(
52-
{
53-
"SQ": 8,
54-
"TA": 2,
55-
"TD": 2,
56-
"TCP": 4,
57-
"TCC": 4,
58-
"CPC": 2,
59-
"CPF": 2,
60-
"SPI": 2,
61-
"GRBM": 2,
62-
"GDS": 4,
63-
"TCC_channels": 16,
64-
}
65-
)
52+
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx950"))
6653
# Create roofline object if mode is provided; skip for --specs
6754
if hasattr(self.get_args(), "mode") and self.get_args().mode:
6855
self.roofline_obj = Roofline(args, self._mspec)

0 commit comments

Comments
 (0)