@@ -31,115 +31,258 @@ def plot_resource_tracker(csv_file):
3131 # Read CSV file
3232 df = pd .read_csv (csv_file )
3333
34+ # Check if CSV has any data rows
35+ if len (df ) == 0 :
36+ print (f"Error: CSV file '{ csv_file } ' contains no data rows (only header)." )
37+ print ("Make sure the application runs with ZEL_ENABLE_SYSTEM_RESOURCE_TRACKER_CHECKER=1" )
38+ return
39+
3440 # Convert time from milliseconds to seconds
3541 df ['TimeSec' ] = df ['TimeMs' ] / 1000.0
3642
43+ # Detect potential memory leaks - track memory increases that don't get freed
44+ # For create/destroy operations, memory should return to baseline
45+ # Look for APIs that destroy resources but memory doesn't decrease
46+ df ['PotentialLeak_KB' ] = 0
47+
48+ # Create/Destroy pairs to monitor for leaks
49+ create_destroy_pairs = {
50+ 'zeContextCreate' : 'zeContextDestroy' ,
51+ 'zeCommandQueueCreate' : 'zeCommandQueueDestroy' ,
52+ 'zeModuleCreate' : 'zeModuleDestroy' ,
53+ 'zeKernelCreate' : 'zeKernelDestroy' ,
54+ 'zeEventPoolCreate' : 'zeEventPoolDestroy' ,
55+ 'zeCommandListCreate' : 'zeCommandListDestroy' ,
56+ 'zeCommandListCreateImmediate' : 'zeCommandListDestroy' ,
57+ 'zeEventCreate' : 'zeEventDestroy' ,
58+ 'zeFenceCreate' : 'zeFenceDestroy' ,
59+ 'zeImageCreate' : 'zeImageDestroy' ,
60+ 'zeSamplerCreate' : 'zeSamplerDestroy' ,
61+ 'zeMemAllocDevice' : 'zeMemFree' ,
62+ 'zeMemAllocHost' : 'zeMemFree' ,
63+ 'zeMemAllocShared' : 'zeMemFree' ,
64+ }
65+
66+ # Track memory at create and check if it decreased at destroy
67+ for idx , row in df .iterrows ():
68+ api = row ['APICall' ]
69+ # If this is a destroy operation, check if memory decreased
70+ if api in create_destroy_pairs .values ():
71+ # Memory should decrease on destroy - if it increased, it's a leak
72+ if row ['Delta_VmRSS_KB' ] > 100 : # Threshold: 100KB increase on destroy = leak
73+ df .at [idx , 'PotentialLeak_KB' ] = row ['Delta_VmRSS_KB' ]
74+
75+ # Calculate cumulative leaks over time
76+ df ['CumulativeLeak_KB' ] = df ['PotentialLeak_KB' ].cumsum ()
77+
3778 # Create figure with multiple subplots
38- fig , axes = plt .subplots (3 , 3 , figsize = (18 , 12 ))
79+ fig , axes = plt .subplots (4 , 3 , figsize = (18 , 16 ))
3980 fig .suptitle (f'Level Zero System Resource Tracking\n { Path (csv_file ).name } ' , fontsize = 16 )
4081
41- # Plot 1: Memory Usage Over Time (VmRSS, VmSize, VmData)
82+ # Plot 1: Memory Leak Detection Over Time
4283 ax1 = axes [0 , 0 ]
43- ax1 .plot (df ['TimeSec' ], df ['VmRSS_KB' ], label = 'VmRSS' , linewidth = 2 )
44- ax1 .plot (df ['TimeSec' ], df ['VmSize_KB' ], label = 'VmSize' , linewidth = 2 , alpha = 0.7 )
45- ax1 .plot (df ['TimeSec' ], df ['VmData_KB' ], label = 'VmData' , linewidth = 2 , alpha = 0.7 )
84+ # Plot cumulative leaks
85+ ax1 .plot (df ['TimeSec' ], df ['CumulativeLeak_KB' ] / 1024 , label = 'Cumulative Leaks' ,
86+ linewidth = 3 , color = 'red' )
87+ # Mark individual leak events
88+ leak_events = df [df ['PotentialLeak_KB' ] > 0 ]
89+ if not leak_events .empty :
90+ ax1 .scatter (leak_events ['TimeSec' ], leak_events ['CumulativeLeak_KB' ] / 1024 ,
91+ color = 'darkred' , s = 100 , marker = 'x' , linewidths = 3 ,
92+ label = f'Leak Events ({ len (leak_events )} )' , zorder = 5 )
4693 ax1 .set_xlabel ('Time (s)' )
47- ax1 .set_ylabel ('Memory (KB )' )
48- ax1 .set_title ('System Memory Usage Over Time' )
94+ ax1 .set_ylabel ('Memory Leaked (MB )' )
95+ ax1 .set_title ('Memory Leak Detection Over Time' , fontweight = 'bold' , color = 'darkred ' )
4996 ax1 .legend ()
5097 ax1 .grid (True , alpha = 0.3 )
98+ if df ['CumulativeLeak_KB' ].max () > 0 :
99+ ax1 .set_facecolor ('#fff5f5' ) # Light red background if leaks detected
51100
52- # Plot 2: Memory Deltas (showing per-call changes )
101+ # Plot 2: Memory Usage Over Time (VmRSS, VmSize, VmData )
53102 ax2 = axes [0 , 1 ]
54- ax2 .plot (df ['TimeSec' ], df ['Delta_VmRSS_KB ' ] / 1024 , label = 'Delta VmRSS' , linewidth = 1.5 )
55- ax2 .plot (df ['TimeSec' ], df ['Delta_VmSize_KB ' ] / 1024 , label = 'Delta VmSize' , linewidth = 1.5 , alpha = 0.7 )
56- ax2 .axhline ( y = 0 , color = 'black' , linestyle = '-- ' , linewidth = 0.5 )
103+ ax2 .plot (df ['TimeSec' ], df ['VmRSS_KB ' ] / 1024 , label = 'VmRSS' , linewidth = 2 )
104+ ax2 .plot (df ['TimeSec' ], df ['VmSize_KB ' ] / 1024 , label = 'VmSize' , linewidth = 2 , alpha = 0.7 )
105+ ax2 .plot ( df [ 'TimeSec' ], df [ 'VmData_KB' ] / 1024 , label = 'VmData ' , linewidth = 2 , alpha = 0.7 )
57106 ax2 .set_xlabel ('Time (s)' )
58- ax2 .set_ylabel ('Memory Change (MB)' )
59- ax2 .set_title ('Memory Deltas Per API Call ' )
107+ ax2 .set_ylabel ('Memory (MB)' )
108+ ax2 .set_title ('System Memory Usage Over Time ' )
60109 ax2 .legend ()
61110 ax2 .grid (True , alpha = 0.3 )
62111
63- # Plot 3: Level Zero Resource Counts
64- ax3 = axes [1 , 0 ]
112+ # Plot 3: Leak Events by API Type
113+ ax3 = axes [0 , 2 ]
114+ if not leak_events .empty :
115+ leak_by_api = leak_events .groupby ('APICall' )['PotentialLeak_KB' ].sum ().sort_values (ascending = True ) / 1024
116+ if not leak_by_api .empty :
117+ leak_by_api .plot (kind = 'barh' , ax = ax3 , color = 'orangered' )
118+ ax3 .set_xlabel ('Total Leaked Memory (MB)' )
119+ ax3 .set_title ('Memory Leaks by API Call' , fontweight = 'bold' )
120+ ax3 .grid (True , alpha = 0.3 , axis = 'x' )
121+ ax3 .set_facecolor ('#fff5f5' )
122+ else :
123+ ax3 .text (0.5 , 0.5 , 'No Leaks Detected!' , ha = 'center' , va = 'center' ,
124+ fontsize = 14 , color = 'green' , fontweight = 'bold' , transform = ax3 .transAxes )
125+ ax3 .set_title ('Memory Leaks by API Call' )
126+ ax3 .axis ('off' )
127+ else :
128+ ax3 .text (0.5 , 0.5 , 'No Leaks Detected!' , ha = 'center' , va = 'center' ,
129+ fontsize = 14 , color = 'green' , fontweight = 'bold' , transform = ax3 .transAxes )
130+ ax3 .set_title ('Memory Leaks by API Call' )
131+ ax3 .axis ('off' )
132+
133+ # Plot 4: Memory Deltas (showing per-call changes)
134+ ax4 = axes [1 , 0 ]
135+ ax4 .plot (df ['TimeSec' ], df ['Delta_VmRSS_KB' ] / 1024 , label = 'Delta VmRSS' , linewidth = 1.5 )
136+ ax4 .plot (df ['TimeSec' ], df ['Delta_VmSize_KB' ] / 1024 , label = 'Delta VmSize' , linewidth = 1.5 , alpha = 0.7 )
137+ ax4 .axhline (y = 0 , color = 'black' , linestyle = '--' , linewidth = 0.5 )
138+ ax4 .set_xlabel ('Time (s)' )
139+ ax4 .set_ylabel ('Memory Change (MB)' )
140+ ax4 .set_title ('Memory Deltas Per API Call' )
141+ ax4 .legend ()
142+ ax4 .grid (True , alpha = 0.3 )
143+
144+ # Plot 5: Level Zero Resource Counts
145+ ax5 = axes [1 , 1 ]
146+ has_resources = False
65147 if df ['Contexts' ].max () > 0 :
66- ax3 .plot (df ['TimeSec' ], df ['Contexts' ], label = 'Contexts' , linewidth = 2 )
148+ ax5 .plot (df ['TimeSec' ], df ['Contexts' ], label = 'Contexts' , linewidth = 2 )
149+ has_resources = True
67150 if df ['CommandQueues' ].max () > 0 :
68- ax3 .plot (df ['TimeSec' ], df ['CommandQueues' ], label = 'CommandQueues' , linewidth = 2 )
151+ ax5 .plot (df ['TimeSec' ], df ['CommandQueues' ], label = 'CommandQueues' , linewidth = 2 )
152+ has_resources = True
69153 if df ['Modules' ].max () > 0 :
70- ax3 .plot (df ['TimeSec' ], df ['Modules' ], label = 'Modules' , linewidth = 2 )
154+ ax5 .plot (df ['TimeSec' ], df ['Modules' ], label = 'Modules' , linewidth = 2 )
155+ has_resources = True
71156 if df ['Kernels' ].max () > 0 :
72- ax3 .plot (df ['TimeSec' ], df ['Kernels' ], label = 'Kernels' , linewidth = 2 )
73- ax3 .set_xlabel ('Time (s)' )
74- ax3 .set_ylabel ('Resource Count' )
75- ax3 .set_title ('L0 Resource Counts (Contexts, Queues, Modules, Kernels)' )
76- ax3 .legend ()
77- ax3 .grid (True , alpha = 0.3 )
78-
79- # Plot 4: Command Lists and Event Resources
80- ax4 = axes [1 , 1 ]
157+ ax5 .plot (df ['TimeSec' ], df ['Kernels' ], label = 'Kernels' , linewidth = 2 )
158+ has_resources = True
159+ ax5 .set_xlabel ('Time (s)' )
160+ ax5 .set_ylabel ('Resource Count' )
161+ ax5 .set_title ('L0 Resource Counts (Contexts, Queues, Modules, Kernels)' )
162+ if has_resources :
163+ ax5 .legend ()
164+ ax5 .grid (True , alpha = 0.3 )
165+
166+ # Plot 6: Command Lists and Event Resources
167+ ax6 = axes [1 , 2 ]
168+ has_cmd_resources = False
81169 if df ['CommandLists' ].max () > 0 :
82- ax4 .plot (df ['TimeSec' ], df ['CommandLists' ], label = 'CommandLists' , linewidth = 2 )
170+ ax6 .plot (df ['TimeSec' ], df ['CommandLists' ], label = 'CommandLists' , linewidth = 2 )
171+ has_cmd_resources = True
83172 if df ['EventPools' ].max () > 0 :
84- ax4 .plot (df ['TimeSec' ], df ['EventPools' ], label = 'EventPools' , linewidth = 2 )
173+ ax6 .plot (df ['TimeSec' ], df ['EventPools' ], label = 'EventPools' , linewidth = 2 )
174+ has_cmd_resources = True
85175 if df ['Events' ].max () > 0 :
86- ax4 .plot (df ['TimeSec' ], df ['Events' ], label = 'Events' , linewidth = 2 )
176+ ax6 .plot (df ['TimeSec' ], df ['Events' ], label = 'Events' , linewidth = 2 )
177+ has_cmd_resources = True
87178 if df ['Fences' ].max () > 0 :
88- ax4 .plot (df ['TimeSec' ], df ['Fences' ], label = 'Fences' , linewidth = 2 )
89- ax4 .set_xlabel ('Time (s)' )
90- ax4 .set_ylabel ('Resource Count' )
91- ax4 .set_title ('L0 Command Lists and Events' )
92- ax4 .legend ()
93- ax4 .grid (True , alpha = 0.3 )
179+ ax6 .plot (df ['TimeSec' ], df ['Fences' ], label = 'Fences' , linewidth = 2 )
180+ has_cmd_resources = True
181+ ax6 .set_xlabel ('Time (s)' )
182+ ax6 .set_ylabel ('Resource Count' )
183+ ax6 .set_title ('L0 Command Lists and Events' )
184+ if has_cmd_resources :
185+ ax6 .legend ()
186+ ax6 .grid (True , alpha = 0.3 )
94187
95- # Plot 5 : Total Memory Allocations
96- ax5 = axes [2 , 0 ]
97- ax5 .plot (df ['TimeSec' ], df ['TotalMemory_Bytes' ] / (1024 * 1024 ), label = 'Total Memory' ,
188+ # Plot 7 : Total Memory Allocations
189+ ax7 = axes [2 , 0 ]
190+ ax7 .plot (df ['TimeSec' ], df ['TotalMemory_Bytes' ] / (1024 * 1024 ), label = 'Total Memory' ,
98191 linewidth = 2 , color = 'red' )
99- ax5 .set_xlabel ('Time (s)' )
100- ax5 .set_ylabel ('Memory (MB)' )
101- ax5 .set_title ('Total L0 Memory Allocations' )
102- ax5 .legend ()
103- ax5 .grid (True , alpha = 0.3 )
192+ ax7 .set_xlabel ('Time (s)' )
193+ ax7 .set_ylabel ('Memory (MB)' )
194+ ax7 .set_title ('Total L0 Memory Allocations' )
195+ ax7 .legend ()
196+ ax7 .grid (True , alpha = 0.3 )
104197
105- # Plot 6 : API Call Distribution (top 10 most frequent)
106- ax6 = axes [2 , 1 ]
198+ # Plot 8 : API Call Distribution (top 10 most frequent)
199+ ax8 = axes [2 , 1 ]
107200 api_counts = df ['APICall' ].value_counts ().head (10 ).sort_values (ascending = True )
108- api_counts .plot (kind = 'barh' , ax = ax6 , color = 'steelblue' )
109- ax6 .set_xlabel ('Call Count' )
110- ax6 .set_title ('Top 10 Most Frequent API Calls' )
111- ax6 .grid (True , alpha = 0.3 , axis = 'x' )
201+ if len (api_counts ) > 0 :
202+ api_counts .plot (kind = 'barh' , ax = ax8 , color = 'steelblue' )
203+ ax8 .set_xlabel ('Call Count' )
204+ ax8 .set_title ('Top 10 Most Frequent API Calls' )
205+ ax8 .grid (True , alpha = 0.3 , axis = 'x' )
206+ else :
207+ ax8 .text (0.5 , 0.5 , 'No API calls recorded' , ha = 'center' , va = 'center' ,
208+ fontsize = 12 , transform = ax8 .transAxes )
209+ ax8 .set_title ('Top 10 Most Frequent API Calls' )
210+ ax8 .axis ('off' )
112211
113- # Plot 7 : Top 10 API Calls by Memory Usage
114- ax7 = axes [2 , 2 ]
212+ # Plot 9 : Top 10 API Calls by Memory Usage
213+ ax9 = axes [2 , 2 ]
115214 # Calculate total memory delta per API call type
116215 memory_by_api = (df .groupby ('APICall' )['Delta_VmRSS_KB' ].sum () / 1024 ).sort_values (ascending = True ).tail (10 )
117- memory_by_api .plot (kind = 'barh' , ax = ax7 , color = 'coral' )
118- ax7 .set_xlabel ('Total Memory Delta (MB)' )
119- ax7 .set_title ('Top 10 API Calls by Memory Impact' )
120- ax7 .grid (True , alpha = 0.3 , axis = 'x' )
216+ if len (memory_by_api ) > 0 :
217+ memory_by_api .plot (kind = 'barh' , ax = ax9 , color = 'coral' )
218+ ax9 .set_xlabel ('Total Memory Delta (MB)' )
219+ ax9 .set_title ('Top 10 API Calls by Memory Impact' )
220+ ax9 .grid (True , alpha = 0.3 , axis = 'x' )
221+ else :
222+ ax9 .text (0.5 , 0.5 , 'No API calls recorded' , ha = 'center' , va = 'center' ,
223+ fontsize = 12 , transform = ax9 .transAxes )
224+ ax9 .set_title ('Top 10 API Calls by Memory Impact' )
225+ ax9 .axis ('off' )
121226
122- # Plot 8 : Memory Usage by API Call (average per call)
123- ax8 = axes [1 , 2 ]
227+ # Plot 10 : Memory Usage by API Call (average per call)
228+ ax10 = axes [3 , 0 ]
124229 # Calculate average memory delta per API call type
125230 avg_memory_by_api = (df .groupby ('APICall' )['Delta_VmRSS_KB' ].mean () / 1024 ).sort_values (ascending = True ).tail (10 )
126- avg_memory_by_api .plot (kind = 'barh' , ax = ax8 , color = 'mediumseagreen' )
127- ax8 .set_xlabel ('Avg Memory Delta per Call (MB)' )
128- ax8 .set_title ('Top 10 API Calls by Avg Memory per Call' )
129- ax8 .grid (True , alpha = 0.3 , axis = 'x' )
231+ if len (avg_memory_by_api ) > 0 :
232+ avg_memory_by_api .plot (kind = 'barh' , ax = ax10 , color = 'mediumseagreen' )
233+ ax10 .set_xlabel ('Avg Memory Delta per Call (MB)' )
234+ ax10 .set_title ('Top 10 API Calls by Avg Memory per Call' )
235+ ax10 .grid (True , alpha = 0.3 , axis = 'x' )
236+ else :
237+ ax10 .text (0.5 , 0.5 , 'No API calls recorded' , ha = 'center' , va = 'center' ,
238+ fontsize = 12 , transform = ax10 .transAxes )
239+ ax10 .set_title ('Top 10 API Calls by Avg Memory per Call' )
240+ ax10 .axis ('off' )
130241
131- # Plot 9 : Cumulative memory by API over time
132- ax9 = axes [0 , 2 ]
242+ # Plot 11 : Cumulative memory by API over time
243+ ax11 = axes [3 , 1 ]
133244 # Get top 5 API calls by total memory impact
134245 top5_apis = df .groupby ('APICall' )['Delta_VmRSS_KB' ].sum ().nlargest (5 ).index
135- for api in top5_apis :
136- api_data = df [df ['APICall' ] == api ]
137- ax9 .plot (api_data ['TimeSec' ], (api_data ['Delta_VmRSS_KB' ].cumsum () / 1024 ), label = api , linewidth = 2 )
138- ax9 .set_xlabel ('Time (s)' )
139- ax9 .set_ylabel ('Cumulative Memory Delta (MB)' )
140- ax9 .set_title ('Cumulative Memory Impact by Top 5 APIs' )
141- ax9 .legend (fontsize = 8 )
142- ax9 .grid (True , alpha = 0.3 )
246+ if len (top5_apis ) > 0 :
247+ for api in top5_apis :
248+ api_data = df [df ['APICall' ] == api ]
249+ ax11 .plot (api_data ['TimeSec' ], (api_data ['Delta_VmRSS_KB' ].cumsum () / 1024 ), label = api , linewidth = 2 )
250+ ax11 .set_xlabel ('Time (s)' )
251+ ax11 .set_ylabel ('Cumulative Memory Delta (MB)' )
252+ ax11 .set_title ('Cumulative Memory Impact by Top 5 APIs' )
253+ ax11 .legend (fontsize = 8 )
254+ ax11 .grid (True , alpha = 0.3 )
255+ else :
256+ ax11 .text (0.5 , 0.5 , 'No API calls recorded' , ha = 'center' , va = 'center' ,
257+ fontsize = 12 , transform = ax11 .transAxes )
258+ ax11 .set_title ('Cumulative Memory Impact by Top 5 APIs' )
259+ ax11 .axis ('off' )
260+
261+ # Plot 12: Leak detection timeline with annotations
262+ ax12 = axes [3 , 2 ]
263+ if not leak_events .empty :
264+ # Show individual leak magnitudes over time
265+ ax12 .bar (leak_events ['TimeSec' ], leak_events ['PotentialLeak_KB' ] / 1024 ,
266+ width = 0.01 , color = 'red' , alpha = 0.7 , label = 'Leak Magnitude' )
267+ ax12 .set_xlabel ('Time (s)' )
268+ ax12 .set_ylabel ('Leaked Memory (MB)' )
269+ ax12 .set_title ('Individual Leak Events Timeline' , fontweight = 'bold' )
270+ ax12 .legend ()
271+ ax12 .grid (True , alpha = 0.3 , axis = 'y' )
272+ ax12 .set_facecolor ('#fff5f5' )
273+
274+ # Add text annotation for total
275+ total_leaked = leak_events ['PotentialLeak_KB' ].sum () / 1024
276+ ax12 .text (0.95 , 0.95 , f'Total: { total_leaked :.2f} MB\n { len (leak_events )} events' ,
277+ transform = ax12 .transAxes , ha = 'right' , va = 'top' ,
278+ bbox = dict (boxstyle = 'round' , facecolor = 'white' , alpha = 0.8 ),
279+ fontsize = 10 , fontweight = 'bold' , color = 'darkred' )
280+ else :
281+ ax12 .text (0.5 , 0.5 , 'No Memory Leaks Detected!\n ✓ All resources properly cleaned up' ,
282+ ha = 'center' , va = 'center' , fontsize = 14 , color = 'green' ,
283+ fontweight = 'bold' , transform = ax12 .transAxes )
284+ ax12 .set_title ('Individual Leak Events Timeline' )
285+ ax12 .axis ('off' )
143286
144287 plt .tight_layout ()
145288
@@ -160,6 +303,24 @@ def plot_resource_tracker(csv_file):
160303 print (f"Total memory allocated: { df ['TotalMemory_Bytes' ].max ():.2f} bytes "
161304 f"({ df ['TotalMemory_Bytes' ].max ()/ (1024 * 1024 ):.2f} MB)" )
162305 print (f"Number of threads: { df ['Threads' ].max ()} " )
306+
307+ # Print leak detection summary
308+ print (f"\n === MEMORY LEAK DETECTION ===" )
309+ if df ['CumulativeLeak_KB' ].max () > 0 :
310+ print (f"⚠️ LEAKS DETECTED!" )
311+ print (f"Total leaked memory: { df ['CumulativeLeak_KB' ].max () / 1024 :.2f} MB ({ df ['CumulativeLeak_KB' ].max ():.2f} KB)" )
312+ print (f"Number of leak events: { len (leak_events )} " )
313+ if not leak_events .empty :
314+ print (f"\n Leak events by API:" )
315+ leak_summary = leak_events .groupby ('APICall' )['PotentialLeak_KB' ].agg (['count' , 'sum' , 'mean' ])
316+ leak_summary .columns = ['Count' , 'Total_KB' , 'Avg_KB' ]
317+ leak_summary = leak_summary .sort_values ('Total_KB' , ascending = False )
318+ for api , row in leak_summary .iterrows ():
319+ print (f" { api } : { row ['Count' ]} events, { row ['Total_KB' ]/ 1024 :.2f} MB total, { row ['Avg_KB' ]/ 1024 :.2f} MB avg" )
320+ else :
321+ print (f"✓ No memory leaks detected!" )
322+ print (f" All resources were properly cleaned up." )
323+
163324 print (f"\n Peak resource counts:" )
164325 print (f" Contexts: { df ['Contexts' ].max ()} " )
165326 print (f" CommandQueues: { df ['CommandQueues' ].max ()} " )
0 commit comments