Skip to content

Commit 7bab618

Browse files
committed
Add new 0200 yaml.
1 parent bb13fee commit 7bab618

File tree

5 files changed

+138
-48
lines changed

5 files changed

+138
-48
lines changed

projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def generate_configs(self, arch, config_dir, list_stats, filter_metrics, sys_inf
7373
arch_panel_config = (
7474
config_dir if single_panel_config else config_dir.joinpath(arch)
7575
)
76-
ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
76+
ac.panel_configs = file_io.load_panel_configs(arch_panel_config, {})
7777

7878
# TODO: filter_metrics should/might be one per arch
7979
# print(ac)

projects/rocprofiler-compute/src/rocprof_compute_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def list_metrics(self):
226226
if arch in self.__supported_archs.keys():
227227
ac = schema.ArchConfig()
228228
ac.panel_configs = file_io.load_panel_configs(
229-
self.__args.config_dir.joinpath(arch)
229+
self.__args.config_dir.joinpath(arch), {}
230230
)
231231
sys_info = self.__mspec.get_class_members().iloc[0]
232232
parser.build_dfs(archConfigs=ac, filter_metrics=[], sys_info=sys_info)
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# TUI use only
2+
# NOTE: This is used as a TUI-only yaml file for the beta release of the new performance metric organization
3+
Panel Config:
4+
id: 200
5+
title: System Speed-of-Light
6+
metrics_description:
7+
Theoretical LDS Bandwidth: Indicates the maximum amount of bytes that could have
8+
been loaded from, stored to, or atomically updated in the LDS per unit time
9+
(see LDS Bandwidth example for more detail). This is also presented as a percent
10+
of the peak theoretical F64 MFMA operations achievable on the specific accelerator.
11+
vL1D Cache BW: The number of bytes looked up in the vL1D cache as a result of
12+
VMEM instructions per unit time. The number of bytes is calculated as the number
13+
of cache lines requested multiplied by the cache line size. This value does
14+
not consider partial requests, so e.g., if only a single value is requested
15+
in a cache line, the data movement will still be counted as a full cache line.
16+
This is also presented as a percent of the peak theoretical bandwidth achievable
17+
on the specific accelerator.
18+
L2 Cache BW: The number of bytes looked up in the L2 cache per unit time. The
19+
number of bytes is calculated as the number of cache lines requested multiplied
20+
by the cache line size. This value does not consider partial requests, so e.g.,
21+
if only a single value is requested in a cache line, the data movement will
22+
still be counted as a full cache line. This is also presented as a percent of
23+
the peak theoretical bandwidth achievable on the specific accelerator.
24+
L2-Fabric Read BW: "The number of bytes read by the L2 over the Infinity Fabric\u2122\
25+
\ interface per unit time. This is also presented as a percent of the peak theoretical\
26+
\ bandwidth achievable on the specific accelerator."
27+
L2-Fabric Write BW: The number of bytes sent by the L2 over the Infinity Fabric
28+
interface by write and atomic operations per unit time. This is also presented
29+
as a percent of the peak theoretical bandwidth achievable on the specific accelerator.
30+
Kernel Time: The total duration of the executed kernel.
31+
Kernel Time (Cycles): The total duration of the executed kernel in cycles.
32+
SIMD Utilization: The percent of total SIMD cycles in the kernel where any SIMD
33+
on a CU was actively doing any work, summed over all CUs. Low values (less than
34+
100%) indicate that the accelerator was not fully saturated by the kernel, or
35+
a potential load-imbalance issue.
36+
Clock Rate:
37+
data source:
38+
- metric_table:
39+
id: 201
40+
title: System Speed-of-Light
41+
header:
42+
metric: Metric
43+
value: Avg
44+
unit: Unit
45+
peak: Peak
46+
pop: Pct of Peak
47+
metric:
48+
Theoretical LDS Bandwidth:
49+
value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
50+
/ (End_Timestamp - Start_Timestamp)))
51+
unit: GB/s
52+
peak: (($max_sclk * $cu_per_gpu) * 0.128)
53+
pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($lds_banks_per_cu))
54+
/ (End_Timestamp - Start_Timestamp)) / (($max_sclk * $cu_per_gpu) * 0.00128)))
55+
vL1D Cache BW:
56+
value: AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 128) / (End_Timestamp - Start_Timestamp)))
57+
unit: GB/s
58+
peak: ((($max_sclk / 1000) * 128) * $cu_per_gpu)
59+
pop: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 128) / (End_Timestamp
60+
- Start_Timestamp)))) / ((($max_sclk / 1000) * 128) * $cu_per_gpu))
61+
L2 Cache BW:
62+
value: AVG(((TCC_REQ_sum * 128) / (End_Timestamp - Start_Timestamp)))
63+
unit: GB/s
64+
peak: ((($max_sclk / 1000) * 128) * TO_INT($total_l2_chan))
65+
pop: ((100 * AVG(((TCC_REQ_sum * 128) / (End_Timestamp - Start_Timestamp))))
66+
/ ((($max_sclk / 1000) * 128) * TO_INT($total_l2_chan)))
67+
L2-Fabric Read BW:
68+
value: AVG((128 * TCC_BUBBLE_sum + 64 * (TCC_EA0_RDREQ_sum - TCC_BUBBLE_sum
69+
- TCC_EA0_RDREQ_32B_sum) + 32 * TCC_EA0_RDREQ_32B_sum) / (End_Timestamp
70+
- Start_Timestamp))
71+
unit: GB/s
72+
peak: $hbmBandwidth
73+
pop: ((100 * (AVG((128 * TCC_BUBBLE_sum + 64 * (TCC_EA0_RDREQ_sum - TCC_BUBBLE_sum
74+
- TCC_EA0_RDREQ_32B_sum) + 32 * TCC_EA0_RDREQ_32B_sum) / (End_Timestamp
75+
- Start_Timestamp)))) / $hbmBandwidth)
76+
L2-Fabric Write BW:
77+
value: AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum - TCC_EA0_WRREQ_64B_sum)
78+
* 32)) / (End_Timestamp - Start_Timestamp)))
79+
unit: GB/s
80+
peak: $hbmBandwidth
81+
pop: ((100 * AVG((((TCC_EA0_WRREQ_64B_sum * 64) + ((TCC_EA0_WRREQ_sum -
82+
TCC_EA0_WRREQ_64B_sum) * 32)) / (End_Timestamp - Start_Timestamp)))) /
83+
$hbmBandwidth)
84+
Kernel Time:
85+
avg: AVG((End_Timestamp - Start_Timestamp))
86+
unit: ns
87+
peak: None
88+
pop: None
89+
Kernel Time (Cycles):
90+
avg: AVG($GRBM_GUI_ACTIVE_PER_XCD)
91+
unit: Cycle
92+
peak: None
93+
pop: None
94+
SIMD Utilization:
95+
value: AVG(100 * SQ_BUSY_CU_CYCLES / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))
96+
unit: Pct
97+
peak: 100
98+
pop: AVG(100 * SQ_BUSY_CU_CYCLES / ($GRBM_GUI_ACTIVE_PER_XCD * $cu_per_gpu))
99+
Clock Rate:
100+
value: None
101+
unit: ns
102+
peak: None
103+
pop: None

projects/rocprofiler-compute/src/rocprof_compute_tui/views/main_view.py

Lines changed: 28 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -108,57 +108,48 @@ def run_analysis(self) -> None:
108108
self.top_kernel_to_df_list = []
109109

110110
if not self.selected_path:
111-
try:
112-
self.app.call_from_thread(
113-
lambda: self.query_one("#kernel-view").update_view(
114-
"No directory selected for analysis", LogLevel.ERROR
115-
)
111+
self.app.call_from_thread(
112+
lambda: self.query_one("#kernel-view").update_view(
113+
"No directory selected for analysis", LogLevel.ERROR
116114
)
117-
except:
118-
pass
115+
)
119116
return
120117

121118
try:
122119
self.logger.info(f"Starting analysis on: {self.selected_path}")
123-
try:
124-
self.app.call_from_thread(
125-
lambda: self.query_one("#kernel-view").update_view(
126-
f"Running analysis on: {self.selected_path}", LogLevel.SUCCESS
127-
)
120+
121+
self.app.call_from_thread(
122+
lambda: self.query_one("#kernel-view").update_view(
123+
f"Running analysis on: {self.selected_path}", LogLevel.SUCCESS
128124
)
129-
except:
130-
pass
125+
)
131126

132127
# 1. Create and TUI analyzer
133128
analyzer = tui_analysis(
134129
self.app.args, self.app.supported_archs, self.selected_path
135130
)
136131
analyzer.sanitize()
137132

138-
# 2. Load and process system info
133+
# 2. Load and process system info and Configure SoC
139134
sysinfo_path = Path(self.selected_path) / "sysinfo.csv"
140135
if not sysinfo_path.exists():
141136
raise FileNotFoundError(f"sysinfo.csv not found at {sysinfo_path}")
142-
143137
sys_info = file_io.load_sys_info(sysinfo_path).iloc[0].to_dict()
144-
145-
# 3. Configure SoC and run analysis
146138
self.app.load_soc_specs(sys_info)
139+
140+
# 3. run analysis
147141
analyzer.set_soc(self.app.soc)
148142
analyzer.pre_processing()
149143
self.kernel_to_df_dict = analyzer.run_kernel_analysis()
150144
self.top_kernel_to_df_list = analyzer.run_top_kernel()
151145

152146
if not self.kernel_to_df_dict or not self.top_kernel_to_df_list:
153-
try:
154-
self.app.call_from_thread(
155-
lambda: self.query_one("#kernel-view").update_view(
156-
"Analysis completed but not all data was returned",
157-
LogLevel.WARNING,
158-
)
147+
self.app.call_from_thread(
148+
lambda: self.query_one("#kernel-view").update_view(
149+
"Analysis completed but not all data was returned",
150+
LogLevel.WARNING,
159151
)
160-
except:
161-
pass
152+
)
162153
else:
163154
self.app.call_from_thread(self.refresh_results)
164155
self.logger.info("Kernel Analysis completed successfully")
@@ -169,30 +160,22 @@ def run_analysis(self) -> None:
169160

170161
error_msg = f"Analysis failed: {str(e)}"
171162
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
172-
try:
173-
self.app.call_from_thread(
174-
lambda: self.query_one("#kernel-view").update_view(
175-
error_msg, LogLevel.ERROR
176-
)
163+
self.app.call_from_thread(
164+
lambda: self.query_one("#kernel-view").update_view(
165+
error_msg, LogLevel.ERROR
177166
)
178-
except:
179-
pass
167+
)
180168

181169
def refresh_results(self) -> None:
182-
try:
183-
kernel_view = self.query_one("#kernel-view")
184-
if kernel_view and self.kernel_to_df_dict and self.top_kernel_to_df_list:
185-
kernel_view.update_results(
186-
self.kernel_to_df_dict, self.top_kernel_to_df_list
187-
)
188-
self.logger.success("Results displayed successfully.")
189-
else:
190-
self.logger.error("Kernel view not found or no data available")
191-
except Exception as e:
192-
self.logger.error(f"Error refreshing results: {str(e)}")
170+
kernel_view = self.query_one("#kernel-view")
171+
if kernel_view:
172+
kernel_view.update_results(self.kernel_to_df_dict, self.top_kernel_to_df_list)
173+
self.logger.success("Results displayed successfully.")
174+
else:
175+
self.logger.error("Kernel view not found or no data available")
193176

194177
def refresh_view(self) -> None:
195-
if self.top_kernel_to_df_list:
178+
if self.kernel_to_df_dict and self.top_kernel_to_df_list:
196179
self.refresh_results()
197180
else:
198181
self.logger.warning("No data available for refresh")

projects/rocprofiler-compute/src/utils/file_io.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,18 @@ def load_sys_info(f):
6262
return pd.read_csv(f)
6363

6464

65-
def load_panel_configs(dir):
65+
def load_panel_configs(dir, file_replacements={}):
6666
"""
6767
Load all panel configs from yaml file.
6868
"""
6969
d = {}
7070
for root, dirs, files in os.walk(dir):
7171
for f in files:
7272
if f.endswith(".yaml"):
73+
# Use replacement filename if specified, otherwise use original
74+
f = file_replacements.get(f, f)
75+
print(f"{f}")
76+
print(f"{str(Path(root).joinpath(f))}")
7377
with open(str(Path(root).joinpath(f))) as file:
7478
config = yaml.safe_load(file)
7579
# metric key can be None due to some metric tables not having any metrics

0 commit comments

Comments
 (0)