|
102 | 102 | "\n", |
103 | 103 | "from src.analysis import ResourceHoarding\n", |
104 | 104 | "from src.analysis import efficiency_analysis as ea\n", |
105 | | - "from src.visualization import JobsWithMetricsVisualizer, UsersWithMetricsVisualizer\n", |
106 | | - "from src.config.enum_constants import ResourceHoardingDataFrameNameEnum" |
| 105 | + "from src.visualization import JobsWithMetricsVisualizer, UsersWithMetricsVisualizer, PIGroupsWithMetricsVisualizer\n", |
| 106 | + "from src.config.enum_constants import ResourceHoardingDataFrameNameEnum\n", |
| 107 | + "from src.config.paths import PI_GROUPS_VISUALIZATION_DATA_DIR" |
107 | 108 | ] |
108 | 109 | }, |
109 | 110 | { |
|
145 | 146 | "metadata": {}, |
146 | 147 | "outputs": [], |
147 | 148 | "source": [ |
148 | | - "analyzer = ResourceHoarding(\n", |
149 | | - " jobs_df=preprocessed_jobs_df\n", |
150 | | - ")" |
| 149 | + "analyzer = ResourceHoarding(jobs_df=preprocessed_jobs_df)" |
151 | 150 | ] |
152 | 151 | }, |
153 | 152 | { |
|
158 | 157 | "outputs": [], |
159 | 158 | "source": [ |
160 | 159 | "filtered_jobs = analyzer.filter_jobs_for_analysis(\n", |
161 | | - " gpu_mem_usage_filter={\"min\": 0, \"inclusive\": False},\n", |
162 | | - " requested_vram_filter={\"min\": 0, \"inclusive\": False}\n", |
| 160 | + " gpu_mem_usage_filter={\"min\": 0, \"inclusive\": False}, requested_vram_filter={\"min\": 0, \"inclusive\": False}\n", |
163 | 161 | ")\n", |
164 | 162 | "filtered_jobs" |
165 | 163 | ] |
|
241 | 239 | "# Plot top inefficient jobs by requested VRAM efficiency score, with VRAM-hours as labels\n", |
242 | 240 | "jobs_with_metrics_visualizer = JobsWithMetricsVisualizer(inefficient_jobs_vram_hours.head(10))\n", |
243 | 241 | "jobs_with_metrics_visualizer.visualize(\n", |
244 | | - " column=\"requested_vram_efficiency_score\",\n", |
245 | | - " bar_label_columns=[\"vram_hours\", \"allocated_vram\"],\n", |
246 | | - " figsize=(10, 6)\n", |
| 242 | + " column=\"requested_vram_efficiency_score\", bar_label_columns=[\"vram_hours\", \"allocated_vram\"], figsize=(10, 6)\n", |
247 | 243 | ")" |
248 | 244 | ] |
249 | 245 | }, |
|
381 | 377 | " ascending=True, # Sort by avg_requested_vram_efficiency_score in ascending order\n", |
382 | 378 | " filter_criteria={\n", |
383 | 379 | " \"avg_requested_vram_efficiency_score\": {\"max\": -10, \"inclusive\": True}, # score threshold\n", |
384 | | - " \"job_count\": {\"min\": 5, \"inclusive\": True} # minimum job count threshold\n", |
| 380 | + " \"job_count\": {\"min\": 5, \"inclusive\": True}, # minimum job count threshold\n", |
385 | 381 | " },\n", |
386 | 382 | ")\n", |
387 | 383 | "# Display top inefficient users by avg_requested_vram_efficiency_score\n", |
|
416 | 412 | " print(\"All scores are zero; histogram not informative.\")\n", |
417 | 413 | " else:\n", |
418 | 414 | " import numpy as np\n", |
| 415 | + "\n", |
419 | 416 | " fig, ax = plt.subplots(figsize=(8, 5))\n", |
420 | 417 | "\n", |
421 | 418 | " # Separate negatives (expected) from zeros\n", |
|
452 | 449 | " linthresh = min_abs\n", |
453 | 450 | " else:\n", |
454 | 451 | " linthresh = 1e-6 # fallback small threshold\n", |
455 | | - " ax.set_xscale('symlog', linthresh=linthresh, linscale=1.0, base=10)\n", |
| 452 | + " ax.set_xscale(\"symlog\", linthresh=linthresh, linscale=1.0, base=10)\n", |
456 | 453 | "\n", |
457 | 454 | " # Annotation: counts (negative & zero) and total\n", |
458 | 455 | " neg_count = (scores < 0).sum()\n", |
|
461 | 458 | " ax.text(\n", |
462 | 459 | " 0.98,\n", |
463 | 460 | " 0.95,\n", |
464 | | - " (\"Counts:\\n\" f\"Negative: {neg_count}\\n\" f\"Zero: {zero_count}\\n\" f\"# of Users: {total}\"),\n", |
| 461 | + " (f\"Counts:\\nNegative: {neg_count}\\nZero: {zero_count}\\n# of Users: {total}\"),\n", |
465 | 462 | " transform=ax.transAxes,\n", |
466 | 463 | " ha=\"right\",\n", |
467 | 464 | " va=\"top\",\n", |
|
477 | 474 | " ax2.plot(mids, cdf, color=\"crimson\", marker=\"o\", linestyle=\"-\", linewidth=1, markersize=3)\n", |
478 | 475 | " ax2.set_ylim(0, 1)\n", |
479 | 476 | " ax2.set_ylabel(\"Cumulative Fraction\", color=\"crimson\")\n", |
480 | | - " ax2.tick_params(axis='y', colors='crimson')\n", |
| 477 | + " ax2.tick_params(axis=\"y\", colors=\"crimson\")\n", |
481 | 478 | "\n", |
482 | 479 | " # Ensure x-axis spans to (slightly) include zero for clarity\n", |
483 | 480 | " left, right = ax.get_xlim()\n", |
|
491 | 488 | "# - We plot the actual (negative/zero) scores instead of absolute values.\n", |
492 | 489 | "# - symlog x-scale provides a log-like compression for large negative magnitudes while keeping zero.\n", |
493 | 490 | "# - linthresh picks the smallest non-zero magnitude so near-zero structure is visible.\n", |
494 | | - "# - CDF is computed over actual values to show accumulation from most negative toward zero.\n" |
| 491 | + "# - CDF is computed over actual values to show accumulation from most negative toward zero." |
495 | 492 | ] |
496 | 493 | }, |
497 | 494 | { |
|
620 | 617 | "metadata": {}, |
621 | 618 | "outputs": [], |
622 | 619 | "source": [ |
623 | | - "from matplotlib.transforms import blended_transform_factory\n", |
624 | | - "\n", |
625 | 620 | "inefficient_pis_avg_req_vram_eff_score = analyzer.sort_and_filter_records_with_metrics(\n", |
626 | 621 | " metrics_df_name_enum=ResourceHoardingDataFrameNameEnum.PI_GROUPS,\n", |
627 | 622 | " sorting_key=\"avg_requested_vram_efficiency_score\",\n", |
|
635 | 630 | "print(\"\\nTop inefficient PI Groups by Avg Requested VRAM Efficiency Score:\")\n", |
636 | 631 | "display(inefficient_pis_avg_req_vram_eff_score.head(10))\n", |
637 | 632 | "\n", |
638 | | - "# Take top N\n", |
639 | | - "top_pi_accounts = inefficient_pis_avg_req_vram_eff_score.head(10)\n", |
640 | | - "pi_accounts = top_pi_accounts[\"pi_account\"].tolist()\n", |
641 | | - "user_counts = top_pi_accounts.get(\"user_count\", [\"-\"] * len(top_pi_accounts)).tolist()\n", |
642 | | - "\n", |
643 | | - "xmin = top_pi_accounts[\"avg_requested_vram_efficiency_score\"].min()\n", |
644 | | - "# If the minimum value is negative, we need to adjust the heights of the bars\n", |
645 | | - "# to ensure they start from zero for better visualization.\n", |
646 | | - "# This is particularly useful for metrics like allocated VRAM efficiency score.\n", |
647 | | - "if xmin < 0:\n", |
648 | | - " col_heights = pd.Series(\n", |
649 | | - " [abs(xmin)] * len(top_pi_accounts[\"avg_requested_vram_efficiency_score\"]),\n", |
650 | | - " index=top_pi_accounts[\"avg_requested_vram_efficiency_score\"].index\n", |
651 | | - " ) - abs(top_pi_accounts[\"avg_requested_vram_efficiency_score\"])\n", |
652 | | - " print(f\"Minimum value for \\\"avg_requested_vram_efficiency_score\\\": {xmin}\")\n", |
653 | | - "else:\n", |
654 | | - " col_heights = top_pi_accounts[\"avg_requested_vram_efficiency_score\"]\n", |
655 | | - "\n", |
656 | | - "plt.figure(figsize=(10, 6))\n", |
657 | | - "plot_df = pd.DataFrame({\n", |
658 | | - " \"col_height\": col_heights.to_numpy(),\n", |
659 | | - " \"job_hours\": top_pi_accounts[\"pi_acc_job_hours\"],\n", |
660 | | - " \"pi_account\": top_pi_accounts[\"pi_account\"]\n", |
661 | | - "})\n", |
662 | | - "ax = sns.barplot(\n", |
663 | | - " y=top_pi_accounts[\"pi_account\"].tolist(),\n", |
664 | | - " x=col_heights.tolist(),\n", |
665 | | - " order=top_pi_accounts[\"pi_account\"].tolist(),\n", |
666 | | - " orient=\"h\",\n", |
667 | | - " palette=\"Blues_r\",\n", |
668 | | - " hue=top_pi_accounts[\"pi_account\"].tolist()\n", |
669 | | - ")\n", |
670 | | - "\n", |
671 | | - "# We'll replace the default tick labels with custom two-line labels placed OUTSIDE the left spine.\n", |
672 | | - "ax.set_yticks(range(len(pi_accounts)))\n", |
673 | | - "ax.set_yticklabels([]) # clear built-in labels\n", |
674 | | - "ax.set_ylabel(\"PI Account\")\n", |
675 | | - "\n", |
676 | | - "transform = blended_transform_factory(ax.transAxes, ax.transData) # x in axes fraction, y in data coords\n", |
677 | | - "x_outside = -0.02 # negative x fraction places text just left of spine; adjust if needed\n", |
678 | | - "line_gap = 0.4 # vertical separation between the two lines\n", |
679 | | - "for y_pos, (pi, uc) in enumerate(zip(pi_accounts, user_counts, strict=True)):\n", |
680 | | - " # First line (PI account) slightly above center\n", |
681 | | - " ax.text(\n", |
682 | | - " x_outside,\n", |
683 | | - " y_pos - line_gap / 2,\n", |
684 | | - " pi,\n", |
685 | | - " ha=\"right\",\n", |
686 | | - " va=\"center\",\n", |
687 | | - " transform=transform,\n", |
688 | | - " fontsize=10,\n", |
689 | | - " clip_on=False,\n", |
690 | | - " )\n", |
691 | | - " # Second line (Users) slightly below center\n", |
692 | | - " ax.text(\n", |
693 | | - " x_outside,\n", |
694 | | - " y_pos + line_gap / 2,\n", |
695 | | - " f\"# of Users: {uc}\",\n", |
696 | | - " ha=\"right\",\n", |
697 | | - " va=\"center\",\n", |
698 | | - " transform=transform,\n", |
699 | | - " fontsize=9,\n", |
700 | | - " color=\"dimgray\",\n", |
701 | | - " clip_on=False,\n", |
702 | | - " )\n", |
703 | | - "\n", |
704 | | - "# Y-axis label: place further left than custom tick labels.\n", |
705 | | - "ax.set_ylabel(\"PI Account\", rotation=90, labelpad=20)\n", |
706 | | - "# Position the label using axes fraction (x< x_outside)\n", |
707 | | - "ax.yaxis.set_label_coords(x_outside - 0.30, 0.5) \n", |
708 | | - "\n", |
709 | | - "# Hide y-axis tick labels (already blank) but keep small outward ticks if desired\n", |
710 | | - "ax.tick_params(axis='y', which='both', direction='out', length=4, pad=2)\n", |
711 | | - "plt.subplots_adjust(left=0.7)\n", |
712 | | - "\n", |
713 | | - "plt.xlabel(\"Average Requested VRAM Efficiency Score\")\n", |
714 | | - "plt.title(\"Top Inefficient PI Accounts by Average Requested VRAM Efficiency Score\")\n", |
715 | | - "\n", |
716 | | - "xmax = top_pi_accounts[\"avg_requested_vram_efficiency_score\"].max()\n", |
717 | | - "\n", |
718 | | - "# Set x-axis limit to 1.6 times the maximum value\n", |
719 | | - "# This ensures that the bars do not touch the right edge of the plot\n", |
720 | | - "xlim_multiplier = 1.6\n", |
721 | | - "xlim = abs(xmin) * xlim_multiplier if xmin < 0 else (xmax * xlim_multiplier if xmax > 0 else 1)\n", |
722 | | - "ax.set_xlim(0, xlim)\n", |
723 | | - "# If the minimum value is negative, we need to adjust the x-ticks accordingly\n", |
724 | | - "if xmin < 0:\n", |
725 | | - " num_xticks = max(4, min(12, int(abs(xmin) // (xlim * 0.10)) + 1))\n", |
726 | | - " xticks = np.linspace(xmin, 0, num=num_xticks)\n", |
727 | | - " ax.set_xticks([abs(xmin) - abs(val) for val in xticks])\n", |
728 | | - " ax.set_xticklabels([f\"{val:.2f}\" if -1 < val < 1 else f\"{val:.0f}\" for val in xticks], rotation=45)\n", |
729 | | - "for i, (vram_hours, pi_acc_job_hours, avg_req_vram_eff_score) in enumerate(\n", |
730 | | - " zip(\n", |
731 | | - " top_pi_accounts[\"pi_acc_vram_hours\"],\n", |
732 | | - " top_pi_accounts[\"pi_acc_job_hours\"],\n", |
733 | | - " col_heights,\n", |
734 | | - " strict=True,\n", |
735 | | - " )\n", |
736 | | - "):\n", |
737 | | - " xpos = min(avg_req_vram_eff_score + xlim * 0.02, xlim * 0.92)\n", |
738 | | - " ax.text(\n", |
739 | | - " xpos,\n", |
740 | | - " i,\n", |
741 | | - " f\"VRAM-Hours: {vram_hours:.2f}\\nJob Hours: {pi_acc_job_hours:.2f}\",\n", |
742 | | - " va=\"center\",\n", |
743 | | - " ha=\"left\",\n", |
744 | | - " fontsize=9,\n", |
745 | | - " color=\"black\",\n", |
746 | | - " clip_on=False,\n", |
747 | | - " )\n", |
748 | | - "\n", |
749 | | - "ax.tick_params(axis='y', which='both', direction='out', length=4, pad=2)\n", |
750 | | - "plt.tight_layout()\n", |
751 | | - "plt.show()\n" |
| 633 | + "pis_with_metrics_visualizer = PIGroupsWithMetricsVisualizer(inefficient_pis_avg_req_vram_eff_score.head(10))\n", |
| 634 | + "pis_with_metrics_visualizer.visualize(\n", |
| 635 | + " output_dir_path=PI_GROUPS_VISUALIZATION_DATA_DIR,\n", |
| 636 | + " column=\"avg_requested_vram_efficiency_score\",\n", |
| 637 | + " bar_label_columns=[\"pi_acc_job_hours\", \"pi_acc_vram_hours\"],\n", |
| 638 | + " figsize=(10, 6),\n", |
| 639 | + ")" |
752 | 640 | ] |
753 | 641 | } |
754 | 642 | ], |
|
0 commit comments