Skip to content

Commit 980732c

Browse files
committed
Add histogram visualization for score metrics to the visualization class
1 parent ffa0aa2 commit 980732c

File tree

4 files changed

+168
-103
lines changed

4 files changed

+168
-103
lines changed

notebooks/Analysis/Requested and Used VRAM.ipynb

Lines changed: 21 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"id": "0",
66
"metadata": {},
77
"source": [
8-
"# <a id='toc1_'></a>[Analysis of Jobs that Requested and Used VRAM](#toc0_)\n",
8+
"# <a id='toc1_'></a>[Analysis of GPU Jobs that Requested and Used VRAM](#toc0_)\n",
99
"This notebook generates the analysis for jobs that requested some VRAM and run on partitions that their type is GPU and some GPU VRAM is used. It looks at these jobs, corresponding users, and PI groups."
1010
]
1111
},
@@ -15,7 +15,7 @@
1515
"metadata": {},
1616
"source": [
1717
"**Table of contents**<a id='toc0_'></a> \n",
18-
"- [Analysis of Jobs that Requested and Used VRAM](#toc1_) \n",
18+
"- [Analysis of GPU Jobs that Requested and Used VRAM](#toc1_) \n",
1919
" - [Setup](#toc1_1_) \n",
2020
" - [Data Digestion and Preprocessing](#toc1_2_) \n",
2121
" - [Narrowing Dataset to Relevant Partition](#toc1_3_) \n",
@@ -61,9 +61,7 @@
6161
"# Import required modules\n",
6262
"import sys\n",
6363
"from pathlib import Path\n",
64-
"import pandas as pd\n",
65-
"import matplotlib.pyplot as plt\n",
66-
"import seaborn as sns"
64+
"import pandas as pd"
6765
]
6866
},
6967
{
@@ -102,9 +100,17 @@
102100
"\n",
103101
"from src.analysis import ResourceHoarding\n",
104102
"from src.analysis import efficiency_analysis as ea\n",
105-
"from src.visualization import JobsWithMetricsVisualizer, UsersWithMetricsVisualizer, PIGroupsWithMetricsVisualizer\n",
103+
"from src.visualization import (\n",
104+
" JobsWithMetricsVisualizer,\n",
105+
" UsersWithMetricsVisualizer,\n",
106+
" PIGroupsWithMetricsVisualizer,\n",
107+
")\n",
106108
"from src.config.enum_constants import ResourceHoardingDataFrameNameEnum\n",
107-
"from src.config.paths import PI_GROUPS_VISUALIZATION_DATA_DIR"
109+
"from src.config.paths import (\n",
110+
" JOBS_VISUALIZATION_DATA_DIR,\n",
111+
" USERS_VISUALIZATION_DATA_DIR,\n",
112+
" PI_GROUPS_VISUALIZATION_DATA_DIR,\n",
113+
")"
108114
]
109115
},
110116
{
@@ -239,7 +245,10 @@
239245
"# Plot top inefficient jobs by requested VRAM efficiency score, with VRAM-hours as labels\n",
240246
"jobs_with_metrics_visualizer = JobsWithMetricsVisualizer(inefficient_jobs_vram_hours.head(10))\n",
241247
"jobs_with_metrics_visualizer.visualize(\n",
242-
" column=\"requested_vram_efficiency_score\", bar_label_columns=[\"vram_hours\", \"allocated_vram\"], figsize=(10, 6)\n",
248+
" output_dir_path=JOBS_VISUALIZATION_DATA_DIR,\n",
249+
" column=\"requested_vram_efficiency_score\",\n",
250+
" bar_label_columns=[\"vram_hours\", \"allocated_vram\"],\n",
251+
" figsize=(10, 6),\n",
243252
")"
244253
]
245254
},
@@ -399,96 +408,10 @@
399408
"metadata": {},
400409
"outputs": [],
401410
"source": [
402-
"# Distribution of Avg Requested VRAM Efficiency Score (actual values; all are <= 0)\n",
403-
"# We keep scores as-is (negative or zero) and construct bins that respect the skew while\n",
404-
"# still giving higher resolution near zero using log-spaced absolute values mapped back to negatives.\n",
405-
"scores = inefficient_users_avg_req_vram_eff_score[\"avg_requested_vram_efficiency_score\"].dropna()\n",
406-
"print(len(scores), \"scores found for plotting.\")\n",
407-
"if scores.empty:\n",
408-
" print(\"No scores to plot.\")\n",
409-
"else:\n",
410-
" # If all scores are exactly zero, a histogram is not informative\n",
411-
" if (scores != 0).sum() == 0:\n",
412-
" print(\"All scores are zero; histogram not informative.\")\n",
413-
" else:\n",
414-
" import numpy as np\n",
415-
"\n",
416-
" fig, ax = plt.subplots(figsize=(8, 5))\n",
417-
"\n",
418-
" # Separate negatives (expected) from zeros\n",
419-
" neg_scores = scores[scores < 0]\n",
420-
" zero_scores = scores[scores == 0]\n",
421-
"\n",
422-
" min_abs = None # track smallest non-zero absolute value for symlog threshold\n",
423-
"\n",
424-
" # Build bins: if we have negative values, create log-spaced absolute edges then map back\n",
425-
" if not neg_scores.empty:\n",
426-
" n_bins = 100\n",
427-
" min_abs = neg_scores.abs().min()\n",
428-
" max_abs = neg_scores.abs().max()\n",
429-
" if min_abs == max_abs:\n",
430-
" # Degenerate case: all negative values identical -> fall back to linear bins\n",
431-
" bins = np.linspace(neg_scores.min(), 0, 20)\n",
432-
" else:\n",
433-
" abs_edges = np.logspace(np.log10(min_abs), np.log10(max_abs), n_bins)\n",
434-
" # Convert absolute edges to negative edges (descending), then append 0 as the last edge\n",
435-
" neg_edges = -abs_edges[::-1]\n",
436-
" bins = np.unique(np.concatenate([neg_edges, [0]])) # ensure strictly increasing\n",
437-
" else:\n",
438-
" # No negative values (only zeros) already handled earlier; fallback just in case\n",
439-
" bins = 3\n",
440-
"\n",
441-
" sns.histplot(scores, bins=bins, color=\"#1f77b4\", ax=ax)\n",
442-
" ax.set_xlabel(\"Avg Requested VRAM Efficiency Score (<= 0)\")\n",
443-
" ax.set_ylabel(\"Count\")\n",
444-
" ax.set_title(\"Distribution of Avg Requested VRAM Efficiency Scores (Actual Values, Log X)\")\n",
445-
"\n",
446-
" # Apply symmetrical log scale to x-axis to compress the long negative tail while keeping zero.\n",
447-
" # linthresh defines the range around zero that stays linear; choose smallest non-zero magnitude.\n",
448-
" if min_abs is not None and min_abs > 0:\n",
449-
" linthresh = min_abs\n",
450-
" else:\n",
451-
" linthresh = 1e-6 # fallback small threshold\n",
452-
" ax.set_xscale(\"symlog\", linthresh=linthresh, linscale=1.0, base=10)\n",
453-
"\n",
454-
" # Annotation: counts (negative & zero) and total\n",
455-
" neg_count = (scores < 0).sum()\n",
456-
" zero_count = (scores == 0).sum()\n",
457-
" total = len(scores)\n",
458-
" ax.text(\n",
459-
" 0.98,\n",
460-
" 0.95,\n",
461-
" (f\"Counts:\\nNegative: {neg_count}\\nZero: {zero_count}\\n# of Users: {total}\"),\n",
462-
" transform=ax.transAxes,\n",
463-
" ha=\"right\",\n",
464-
" va=\"top\",\n",
465-
" fontsize=9,\n",
466-
" bbox=dict(boxstyle=\"round,pad=0.3\", fc=\"white\", ec=\"gray\", alpha=0.9),\n",
467-
" )\n",
468-
"\n",
469-
" # Cumulative distribution (CDF) over actual score values\n",
470-
" counts, bin_edges = np.histogram(scores, bins=bins)\n",
471-
" cdf = np.cumsum(counts) / counts.sum()\n",
472-
" mids = (bin_edges[1:] + bin_edges[:-1]) / 2\n",
473-
" ax2 = ax.twinx()\n",
474-
" ax2.plot(mids, cdf, color=\"crimson\", marker=\"o\", linestyle=\"-\", linewidth=1, markersize=3)\n",
475-
" ax2.set_ylim(0, 1)\n",
476-
" ax2.set_ylabel(\"Cumulative Fraction\", color=\"crimson\")\n",
477-
" ax2.tick_params(axis=\"y\", colors=\"crimson\")\n",
478-
"\n",
479-
" # Ensure x-axis spans to (slightly) include zero for clarity\n",
480-
" left, right = ax.get_xlim()\n",
481-
" if right < 0:\n",
482-
" ax.set_xlim(left, 0)\n",
483-
"\n",
484-
" plt.tight_layout()\n",
485-
" plt.show()\n",
486-
"\n",
487-
"# Notes:\n",
488-
"# - We plot the actual (negative/zero) scores instead of absolute values.\n",
489-
"# - symlog x-scale provides a log-like compression for large negative magnitudes while keeping zero.\n",
490-
"# - linthresh picks the smallest non-zero magnitude so near-zero structure is visible.\n",
491-
"# - CDF is computed over actual values to show accumulation from most negative toward zero."
411+
"users_with_metrics_visualizer = UsersWithMetricsVisualizer(inefficient_users_avg_req_vram_eff_score)\n",
412+
"users_with_metrics_visualizer.visualize_metric_distribution(\n",
413+
" output_dir_path=USERS_VISUALIZATION_DATA_DIR, column=\"avg_requested_vram_efficiency_score\", figsize=(8, 5)\n",
414+
")"
492415
]
493416
},
494417
{

src/config/paths.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
DATA_DIR = (PROJECT_ROOT / "data").resolve()
1212
PREPROCESSING_DATA_DIR = (DATA_DIR / "preprocessing").resolve()
1313
VISUALIZATION_DATA_DIR = (DATA_DIR / "visualizations").resolve()
14+
JOBS_VISUALIZATION_DATA_DIR = (VISUALIZATION_DATA_DIR / "jobs").resolve()
15+
USERS_VISUALIZATION_DATA_DIR = (VISUALIZATION_DATA_DIR / "users").resolve()
1416
PI_GROUPS_VISUALIZATION_DATA_DIR = (VISUALIZATION_DATA_DIR / "pi_groups").resolve()
1517
REPORTS_DATA_DIR = (DATA_DIR / "reports").resolve()
1618

@@ -21,5 +23,7 @@
2123
DATA_DIR.mkdir(exist_ok=True)
2224
PREPROCESSING_DATA_DIR.mkdir(exist_ok=True)
2325
VISUALIZATION_DATA_DIR.mkdir(exist_ok=True)
26+
JOBS_VISUALIZATION_DATA_DIR.mkdir(exist_ok=True)
27+
USERS_VISUALIZATION_DATA_DIR.mkdir(exist_ok=True)
2428
PI_GROUPS_VISUALIZATION_DATA_DIR.mkdir(exist_ok=True)
2529
REPORTS_DATA_DIR.mkdir(exist_ok=True)

src/visualization/efficiency_metrics.py

Lines changed: 127 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,22 @@
44

55
from abc import ABC
66
from pathlib import Path
7-
from typing import Any
7+
from typing import Any, cast
88

99
import matplotlib.pyplot as plt
1010
import numpy as np
1111
import pandas as pd
1212
import seaborn as sns
1313
from pydantic import ValidationError
1414
from matplotlib.transforms import blended_transform_factory
15+
from .models import (
16+
EfficiencyMetricsKwargsModel,
17+
JobsWithMetricsKwargsModel,
18+
UsersWithMetricsKwargsModel,
19+
UsersWithMetricsHistKwargsModel,
20+
PIGroupsWithMetricsKwargsModel,
21+
)
1522

16-
17-
from .models import EfficiencyMetricsKwargsModel, UsersWithMetricsKwargsModel, PIGroupsWithMetricsKwargsModel
1823
from .visualization import DataVisualizer
1924

2025

@@ -54,7 +59,8 @@ def validate_visualize_kwargs(
5459
) from e
5560

5661
self.validate_column_argument(col_kwargs.column, validated_jobs_df)
57-
self.validate_columns(col_kwargs.bar_label_columns, validated_jobs_df)
62+
if hasattr(col_kwargs, "bar_label_columns") and col_kwargs.bar_label_columns is not None:
63+
self.validate_columns(col_kwargs.bar_label_columns, validated_jobs_df)
5864
self.validate_figsize(col_kwargs.figsize)
5965
return col_kwargs
6066

@@ -83,7 +89,8 @@ def visualize(self, output_dir_path: Path | None = None, **kwargs: dict[str, Any
8389
None: Displays the bar plot of jobs ranked by the specified efficiency metric.
8490
"""
8591
jobs_with_metrics_df = self.validate_dataframe()
86-
validated_kwargs = self.validate_visualize_kwargs(kwargs, jobs_with_metrics_df, EfficiencyMetricsKwargsModel)
92+
validated_kwargs = self.validate_visualize_kwargs(kwargs, jobs_with_metrics_df, JobsWithMetricsKwargsModel)
93+
validated_kwargs = cast(JobsWithMetricsKwargsModel, validated_kwargs)
8794
column = validated_kwargs.column
8895
bar_label_columns = validated_kwargs.bar_label_columns
8996
figsize = validated_kwargs.figsize
@@ -208,6 +215,7 @@ def visualize(self, output_dir_path: Path | None = None, **kwargs: dict[str, Any
208215
users_with_metrics_df = self.validate_dataframe()
209216
validated_kwargs = self.validate_visualize_kwargs(kwargs, users_with_metrics_df, UsersWithMetricsKwargsModel)
210217
column = validated_kwargs.column
218+
validated_kwargs = cast(UsersWithMetricsKwargsModel, validated_kwargs)
211219
bar_label_columns = validated_kwargs.bar_label_columns
212220
figsize = validated_kwargs.figsize
213221
output_dir_path = self.validate_output_dir(output_dir_path)
@@ -287,6 +295,119 @@ def _format_col(col: str) -> str:
287295
plt.savefig(output_dir_path / f"users_ranked_by_{column}_barplot.png", bbox_inches="tight")
288296
plt.show()
289297

298+
def visualize_metric_distribution(self, output_dir_path: Path | None = None, **kwargs: dict[str, Any]) -> None:
299+
"""Visualize the distribution of efficiency metrics for users.
300+
301+
Args:
302+
output_dir_path (Path | None): Path to save the output plot.
303+
**kwargs (dict[str, Any]): Keyword arguments for visualization.
304+
This can include:
305+
- column (str): The efficiency metric to visualize.
306+
- figsize (tuple[int | float, int | float]): Size of the figure.
307+
308+
Returns:
309+
None: Displays the distribution plot of the specified efficiency metric.
310+
"""
311+
users_with_metrics_df = self.validate_dataframe()
312+
validated_kwargs = self.validate_visualize_kwargs(
313+
kwargs,
314+
users_with_metrics_df,
315+
UsersWithMetricsHistKwargsModel,
316+
)
317+
column = validated_kwargs.column
318+
figsize = validated_kwargs.figsize
319+
output_dir_path = self.validate_output_dir(output_dir_path)
320+
321+
# Distribution of Avg Requested VRAM Efficiency Score (actual values; all are <= 0)
322+
# We keep scores as-is (negative or zero) and construct bins that respect the skew while
323+
# still giving higher resolution near zero using log-spaced absolute values mapped back to negatives.
324+
scores = users_with_metrics_df[column]
325+
xmin = users_with_metrics_df[column].min()
326+
327+
if scores.empty:
328+
print("No values to plot.")
329+
if xmin > 0:
330+
# TODO (Arda): implement histogram for positive values
331+
print("All values are positive; histogram not implemented.")
332+
# If all scores are exactly zero, a histogram is not informative
333+
if (scores != 0).sum() == 0:
334+
print("All values are zero; histogram not informative.")
335+
return None
336+
fig, ax = plt.subplots(figsize=figsize)
337+
338+
# Separate negatives (expected) from zeros
339+
neg_scores = scores[scores < 0]
340+
341+
min_abs = None # track smallest non-zero absolute value for symlog threshold
342+
343+
# Build bins: if we have negative values, create log-spaced absolute edges then map back
344+
if not neg_scores.empty:
345+
n_bins = 100
346+
min_abs = neg_scores.abs().min()
347+
max_abs = neg_scores.abs().max()
348+
if min_abs == max_abs:
349+
# Degenerate case: all negative values identical -> fall back to linear bins
350+
bins = np.linspace(neg_scores.min(), 0, 20)
351+
else:
352+
abs_edges = np.logspace(np.log10(min_abs), np.log10(max_abs), n_bins)
353+
# Convert absolute edges to negative edges (descending), then append 0 as the last edge
354+
neg_edges = -abs_edges[::-1]
355+
bins = np.unique(np.concatenate([neg_edges, [0]])) # ensure strictly increasing
356+
357+
sns.histplot(scores, bins=bins, color="#1f77b4", ax=ax)
358+
ax.set_xlabel("Avg Requested VRAM Efficiency Score (<= 0)")
359+
ax.set_ylabel("Count")
360+
ax.set_title("Distribution of Avg Requested VRAM Efficiency Scores (Actual Values, Log X)")
361+
362+
# Apply symmetrical log scale to x-axis to compress the long negative tail while keeping zero.
363+
# linthresh defines the range around zero that stays linear; choose smallest non-zero magnitude.
364+
if min_abs is not None and min_abs > 0:
365+
linthresh = min_abs
366+
else:
367+
linthresh = 1e-6 # fallback small threshold
368+
ax.set_xscale("symlog", linthresh=linthresh, linscale=1.0, base=10)
369+
370+
# Annotation: counts (negative & zero) and total
371+
neg_count = (scores < 0).sum()
372+
zero_count = (scores == 0).sum()
373+
total = len(scores)
374+
ax.text(
375+
0.98,
376+
0.95,
377+
(f"Counts:\nNegative: {neg_count}\nZero: {zero_count}\n# of Users: {total}"),
378+
transform=ax.transAxes,
379+
ha="right",
380+
va="top",
381+
fontsize=9,
382+
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.9),
383+
)
384+
385+
# Cumulative distribution (CDF) over actual score values
386+
counts, bin_edges = np.histogram(scores, bins=bins)
387+
cdf = np.cumsum(counts) / counts.sum()
388+
mids = (bin_edges[1:] + bin_edges[:-1]) / 2
389+
ax2 = ax.twinx()
390+
ax2.plot(mids, cdf, color="crimson", marker="o", linestyle="-", linewidth=1, markersize=3)
391+
ax2.set_ylim(0, 1)
392+
ax2.set_ylabel("Cumulative Fraction", color="crimson")
393+
ax2.tick_params(axis="y", colors="crimson")
394+
395+
# Ensure x-axis spans to (slightly) include zero for clarity
396+
left, right = ax.get_xlim()
397+
if right < 0:
398+
ax.set_xlim(left, 0)
399+
400+
# Notes:
401+
# - We plot the actual (negative/zero) scores instead of absolute values.
402+
# - symlog x-scale provides a log-like compression for large negative magnitudes while keeping zero.
403+
# - linthresh picks the smallest non-zero magnitude so near-zero structure is visible.
404+
# - CDF is computed over actual values to show accumulation from most negative toward zero.
405+
406+
plt.tight_layout()
407+
if output_dir_path is not None:
408+
plt.savefig(output_dir_path / f"user_{column}_distribution.png", bbox_inches="tight")
409+
plt.show()
410+
290411

291412
class PIGroupsWithMetricsVisualizer(EfficiencyMetricsVisualizer):
292413
"""Visualizer for PI groups with efficiency metrics.
@@ -318,6 +439,7 @@ def visualize(self, output_dir_path: Path | None = None, **kwargs: dict[str, Any
318439
PIGroupsWithMetricsKwargsModel,
319440
)
320441
column = validated_kwargs.column
442+
validated_kwargs = cast(PIGroupsWithMetricsKwargsModel, validated_kwargs)
321443
bar_label_columns = validated_kwargs.bar_label_columns
322444
figsize = validated_kwargs.figsize
323445
output_dir_path = self.validate_output_dir(output_dir_path)

src/visualization/models.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ class ColumnVisualizationKwargsModel(BaseModel):
1515
class EfficiencyMetricsKwargsModel(BaseModel):
1616
"""Model for keyword arguments used in efficiency metrics visualizations."""
1717

18+
model_config = ConfigDict(strict=True, extra="allow")
19+
column: str
20+
figsize: tuple[int | float, int | float] = Field(default=(8, 10))
21+
22+
23+
class JobsWithMetricsKwargsModel(EfficiencyMetricsKwargsModel):
24+
"""Model for keyword arguments used in jobs with metrics visualizations."""
25+
1826
model_config = ConfigDict(strict=True, extra="forbid")
1927
column: str
2028
bar_label_columns: list[str] | None
@@ -30,6 +38,14 @@ class UsersWithMetricsKwargsModel(EfficiencyMetricsKwargsModel):
3038
figsize: tuple[int | float, int | float] = Field(default=(8, 8))
3139

3240

41+
class UsersWithMetricsHistKwargsModel(EfficiencyMetricsKwargsModel):
42+
"""Model for keyword arguments used in user metrics histogram visualizations."""
43+
44+
model_config = ConfigDict(strict=True, extra="forbid")
45+
column: str
46+
figsize: tuple[int | float, int | float] = Field(default=(8, 5))
47+
48+
3349
class PIGroupsWithMetricsKwargsModel(EfficiencyMetricsKwargsModel):
3450
"""Model for keyword arguments used in PI group metrics visualizations."""
3551

0 commit comments

Comments
 (0)