|
63 | 63 | - GPU utilization charts (if GPU data is available) |
64 | 64 | - GPU memory utilization charts (if GPU data is available) |
65 | 65 |
|
| 66 | + gpu-report |
| 67 | + Generate a GPU usage report for one or more workspaces/projects. |
| 68 | +
|
| 69 | + Usage: |
| 70 | + cometx admin gpu-report WORKSPACE [WORKSPACE ...] --start-date DATE |
| 71 | + cometx admin gpu-report WORKSPACE/PROJECT [WORKSPACE/PROJECT ...] --start-date DATE |
| 72 | +
|
| 73 | + Arguments: |
| 74 | + WORKSPACE_PROJECT (required, one or more) |
| 75 | + One or more WORKSPACE or WORKSPACE/PROJECT to run GPU report for. |
| 76 | + If WORKSPACE is provided without a project, all projects in that workspace will be included. |
| 77 | +
|
| 78 | + Options: |
| 79 | + --start-date DATE |
| 80 | + Start date for the report in YYYY-MM-DD format (required). |
| 81 | +
|
| 82 | + --end-date DATE |
| 83 | + End date for the report in YYYY-MM-DD format (optional). |
| 84 | + If not provided, reports from start-date onwards. |
| 85 | +
|
| 86 | + --metrics METRIC [METRIC ...] |
| 87 | + List of metrics to track (optional). |
| 88 | + If not provided, uses default GPU metrics. |
| 89 | +
|
| 90 | + Output: |
| 91 | + Returns a dictionary of metrics keyed by experiment key. |
| 92 | +
|
66 | 93 | Global Options (available for all commands): |
67 | 94 | --api-key KEY |
68 | 95 | Set the COMET_API_KEY for authentication. |
|
83 | 110 | cometx admin usage-report my-workspace/project1 my-workspace/project2 |
84 | 111 | cometx admin usage-report workspace1 workspace2 --units week |
85 | 112 | cometx admin usage-report workspace --units day --no-open |
| 113 | + cometx admin gpu-report my-workspace --start-date 2024-01-01 |
| 114 | + cometx admin gpu-report my-workspace --start-date 2024-01-01 --end-date 2024-12-31 |
| 115 | + cometx admin gpu-report workspace1/project1 workspace2 --start-date 2024-01-01 --metrics sys.gpu.0.gpu_utilization |
86 | 116 |
|
87 | 117 | """ |
88 | 118 |
|
|
93 | 123 |
|
94 | 124 | from comet_ml import API |
95 | 125 |
|
| 126 | +from .admin_gpu_report import main as gpu_report_main |
96 | 127 | from .admin_usage_report import generate_usage_report |
97 | 128 |
|
98 | 129 | ADDITIONAL_ARGS = False |
@@ -234,6 +265,88 @@ def get_parser_arguments(parser): |
234 | 265 | type=str, |
235 | 266 | ) |
236 | 267 |
|
| 268 | + # gpu-report subcommand |
| 269 | + gpu_report_description = """Generate a GPU usage report for one or more workspaces/projects. |
| 270 | +
|
| 271 | +Arguments: |
| 272 | + WORKSPACE_PROJECT (required, one or more) |
| 273 | + One or more WORKSPACE or WORKSPACE/PROJECT to run GPU report for. |
| 274 | + If WORKSPACE is provided without a project, all projects in that workspace will be included. |
| 275 | +
|
| 276 | +Options: |
| 277 | + --start-date DATE |
| 278 | + Start date for the report in YYYY-MM-DD format (required). |
| 279 | +
|
| 280 | + --end-date DATE |
| 281 | + End date for the report in YYYY-MM-DD format (optional). |
| 282 | + If not provided, reports from start-date onwards. |
| 283 | +
|
| 284 | + --metrics METRIC [METRIC ...] |
| 285 | + List of metrics to track (optional). |
| 286 | + If not provided, uses default GPU metrics: |
| 287 | + - sys.gpu.0.gpu_utilization |
| 288 | + - sys.gpu.0.memory_utilization |
| 289 | + - sys.gpu.0.used_memory |
| 290 | + - sys.gpu.0.power_usage |
| 291 | + - sys.gpu.0.temperature |
| 292 | +
|
| 293 | + --open |
| 294 | + Automatically open the generated PDF file after generation. |
| 295 | +
|
| 296 | +Output: |
| 297 | + Generates a PDF report containing: |
| 298 | + - Summary statistics (total experiments, workspaces, metrics tracked) |
| 299 | + - Average metrics by workspace charts |
| 300 | + - Maximum metrics by month charts |
| 301 | +
|
| 302 | +Examples: |
| 303 | + cometx admin gpu-report my-workspace --start-date 2024-01-01 |
| 304 | + cometx admin gpu-report my-workspace --start-date 2024-01-01 --end-date 2024-12-31 |
| 305 | + cometx admin gpu-report workspace1/project1 workspace2 --start-date 2024-01-01 |
| 306 | + cometx admin gpu-report my-workspace --start-date 2024-01-01 --metrics sys.gpu.0.gpu_utilization sys.gpu.0.memory_utilization |
| 307 | + cometx admin gpu-report my-workspace --start-date 2024-01-01 --open |
| 308 | +""" |
| 309 | + gpu_parser = subparsers.add_parser( |
| 310 | + "gpu-report", |
| 311 | + help="Generate a GPU usage report for one or more workspaces/projects", |
| 312 | + description=gpu_report_description, |
| 313 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 314 | + ) |
| 315 | + # Add global arguments to subparser so they show in help |
| 316 | + add_global_arguments(gpu_parser) |
| 317 | + gpu_parser.add_argument( |
| 318 | + "WORKSPACE_PROJECT", |
| 319 | + nargs="+", |
| 320 | + help="One or more WORKSPACE or WORKSPACE/PROJECT to run GPU report for", |
| 321 | + metavar="WORKSPACE", |
| 322 | + type=str, |
| 323 | + ) |
| 324 | + gpu_parser.add_argument( |
| 325 | + "--start-date", |
| 326 | + help="Start date for the report in YYYY-MM-DD format (required)", |
| 327 | + type=str, |
| 328 | + required=True, |
| 329 | + ) |
| 330 | + gpu_parser.add_argument( |
| 331 | + "--end-date", |
| 332 | + help="End date for the report in YYYY-MM-DD format (optional)", |
| 333 | + type=str, |
| 334 | + default=None, |
| 335 | + ) |
| 336 | + gpu_parser.add_argument( |
| 337 | + "--metrics", |
| 338 | + help="List of metrics to track (optional, uses defaults if not provided)", |
| 339 | + nargs="+", |
| 340 | + type=str, |
| 341 | + default=None, |
| 342 | + ) |
| 343 | + gpu_parser.add_argument( |
| 344 | + "--open", |
| 345 | + help="Automatically open the generated PDF file after generation", |
| 346 | + default=False, |
| 347 | + action="store_true", |
| 348 | + ) |
| 349 | + |
237 | 350 |
|
238 | 351 | def admin(parsed_args, remaining=None): |
239 | 352 | # Called via `cometx admin ...` |
@@ -326,6 +439,45 @@ def admin(parsed_args, remaining=None): |
326 | 439 | except Exception as e: |
327 | 440 | print("ERROR: " + str(e)) |
328 | 441 | return |
| 442 | + elif parsed_args.ACTION == "gpu-report": |
| 443 | + workspace_projects = parsed_args.WORKSPACE_PROJECT |
| 444 | + start_date = parsed_args.start_date |
| 445 | + end_date = parsed_args.end_date |
| 446 | + metrics = parsed_args.metrics |
| 447 | + |
| 448 | + try: |
| 449 | + result = gpu_report_main( |
| 450 | + workspace_projects=workspace_projects, |
| 451 | + start_date=start_date, |
| 452 | + end_date=end_date, |
| 453 | + metrics=metrics, |
| 454 | + max_workers=None, # Use default |
| 455 | + ) |
| 456 | + if result: |
| 457 | + num_experiments = len(result.get("metrics", {})) |
| 458 | + num_charts = len(result.get("charts", [])) |
| 459 | + pdf_file = result.get("pdf_file") |
| 460 | + print( |
| 461 | + f"\nGPU report completed. Processed {num_experiments} experiments." |
| 462 | + ) |
| 463 | + if num_charts > 0: |
| 464 | + print(f"Generated {num_charts} charts:") |
| 465 | + for chart_file in result.get("charts", []): |
| 466 | + print(f" - {chart_file}") |
| 467 | + if pdf_file: |
| 468 | + print(f"PDF report: {pdf_file}") |
| 469 | + # Open PDF if --open flag is set |
| 470 | + if parsed_args.open: |
| 471 | + from .admin_gpu_report import open_pdf |
| 472 | + |
| 473 | + open_pdf(pdf_file, debug=parsed_args.debug) |
| 474 | + except Exception as e: |
| 475 | + print("ERROR: " + str(e)) |
| 476 | + if parsed_args.debug: |
| 477 | + import traceback |
| 478 | + |
| 479 | + traceback.print_exc() |
| 480 | + return |
329 | 481 |
|
330 | 482 | except KeyboardInterrupt: |
331 | 483 | if parsed_args.debug: |
|
0 commit comments