|
5 | 5 | import re |
6 | 6 | import shutil |
7 | 7 | import time |
8 | | -from typing import Optional, Union |
| 8 | +from typing import Any, Iterator, Optional, Union |
9 | 9 |
|
10 | 10 | import click |
11 | 11 | import humanize |
12 | | -from sqlalchemy import inspect, select, text |
| 12 | +from sqlalchemy import cast, inspect, Row, select, text |
13 | 13 |
|
14 | 14 | from pbench.cli import pass_cli_context |
15 | 15 | from pbench.cli.server import config_setup, Detail, Verify, Watch |
|
18 | 18 | from pbench.server import BadConfig |
19 | 19 | from pbench.server.cache_manager import CacheManager |
20 | 20 | from pbench.server.database.database import Database |
| 21 | +from pbench.server.database.models import TZDateTime |
21 | 22 | from pbench.server.database.models.audit import Audit, AuditStatus |
22 | 23 | from pbench.server.database.models.datasets import Dataset, Metadata |
23 | 24 | from pbench.server.database.models.index_map import IndexMap |
|
34 | 35 | # SQL "chunk size" |
35 | 36 | SQL_CHUNK = 2000 |
36 | 37 |
|
| 38 | +# Translate datetime.datetime.month (1 - 12) into a name. |
| 39 | +MONTHS = ( |
| 40 | + "00", |
| 41 | + "Jan", |
| 42 | + "Feb", |
| 43 | + "Mar", |
| 44 | + "Apr", |
| 45 | + "May", |
| 46 | + "Jun", |
| 47 | + "Jul", |
| 48 | + "Aug", |
| 49 | + "Sep", |
| 50 | + "Oct", |
| 51 | + "Nov", |
| 52 | + "Dec", |
| 53 | +) |
| 54 | + |
| 55 | +# Translate datetime.datetime.weekday() (0 - 6) into a name |
| 56 | +DAYS_OF_WEEK = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun") |
| 57 | + |
37 | 58 | detailer: Optional[Detail] = None |
38 | 59 | watcher: Optional[Watch] = None |
39 | 60 | verifier: Optional[Verify] = None |
@@ -308,6 +329,126 @@ def report_cache(tree: CacheManager): |
308 | 329 | ) |
309 | 330 |
|
310 | 331 |
|
| 332 | +def columnize( |
| 333 | + items: dict[int, int], |
| 334 | + width: int = 80, |
| 335 | + ifmt: str = "4d", |
| 336 | + cfmt: str = ">8,d", |
| 337 | + lookup: Optional[list[str]] = None, |
| 338 | +): |
| 339 | + """Combine multiple outputs across a line to minimize vertical space |
| 340 | +
|
| 341 | + Args: |
| 342 | + items: dictionary of items to report as "key: value" |
| 343 | + width: width of line to fill (default 80) |
| 344 | + ifmt: format string for key |
| 345 | + cfmt: format string for value |
| 346 | + lookup: list of string values to represent key |
| 347 | + """ |
| 348 | + line = "" |
| 349 | + for item, count in sorted(items.items()): |
| 350 | + try: |
| 351 | + k = lookup[item] if lookup else item |
| 352 | + except Exception as e: |
| 353 | + click.echo(f"{item} from {lookup}: {str(e)!r}", err=True) |
| 354 | + k = str(item) |
| 355 | + add = f" {k:{ifmt}}: {count:{cfmt}}" |
| 356 | + if len(line) + len(add) >= width: |
| 357 | + click.echo(line) |
| 358 | + line = add |
| 359 | + else: |
| 360 | + line += add |
| 361 | + if line: |
| 362 | + click.echo(line) |
| 363 | + |
| 364 | + |
| 365 | +def summarize_dates(rows: Iterator[Row], width: int = 80): |
| 366 | + by_year = defaultdict(int) |
| 367 | + by_month = defaultdict(int) |
| 368 | + by_day = defaultdict(int) |
| 369 | + by_weekday = defaultdict(int) |
| 370 | + by_hour = defaultdict(int) |
| 371 | + |
| 372 | + day = datetime.datetime.now(datetime.timezone.utc).replace( |
| 373 | + hour=0, minute=0, second=0, microsecond=0 |
| 374 | + ) |
| 375 | + month = day.replace(day=1) |
| 376 | + year = month.replace(month=1) |
| 377 | + week = day - datetime.timedelta(days=7) |
| 378 | + |
| 379 | + this_year = 0 |
| 380 | + this_month = 0 |
| 381 | + this_week = 0 |
| 382 | + this_day = 0 |
| 383 | + |
| 384 | + for row in rows: |
| 385 | + date: datetime.datetime = row[0] |
| 386 | + if not isinstance(date, datetime.datetime): |
| 387 | + detailer.message(f"Got non-datetime row {row}") |
| 388 | + continue |
| 389 | + by_year[date.year] += 1 |
| 390 | + by_month[date.month] += 1 |
| 391 | + by_day[date.day] += 1 |
| 392 | + by_weekday[date.weekday()] += 1 |
| 393 | + by_hour[date.hour] += 1 |
| 394 | + |
| 395 | + if date >= year: |
| 396 | + this_year += 1 |
| 397 | + if date >= month: |
| 398 | + this_month += 1 |
| 399 | + if date >= week: |
| 400 | + this_week += 1 |
| 401 | + if date >= day: |
| 402 | + this_day += 1 |
| 403 | + |
| 404 | + click.echo(f" {this_year:,d} this year ({year:%Y})") |
| 405 | + click.echo(f" {this_month:,d} this month ({month:%B %Y})") |
| 406 | + click.echo(f" {this_week:,d} this week ({week:%B %d} to {day:%B %d})") |
| 407 | + click.echo(f" {this_day:,d} today ({day:%d %B %Y})") |
| 408 | + |
| 409 | + click.echo(" Total by year:") |
| 410 | + columnize(by_year, width) |
| 411 | + click.echo(" Total by month of year:") |
| 412 | + columnize(by_month, width, ifmt="s", lookup=MONTHS) |
| 413 | + click.echo(" Total by day of month:") |
| 414 | + columnize(by_day, width, ifmt="02d") |
| 415 | + click.echo(" Total by day of week:") |
| 416 | + columnize(by_weekday, width, ifmt="s", lookup=DAYS_OF_WEEK) |
| 417 | + click.echo(" Total by hour of day:") |
| 418 | + columnize(by_hour, width, ifmt="02d") |
| 419 | + |
| 420 | + |
| 421 | +def report_creation(options: dict[str, Any]): |
| 422 | + """Report dataset statistics by creation date""" |
| 423 | + |
| 424 | + watcher.update("analyzing upload patterns") |
| 425 | + |
| 426 | + rows = ( |
| 427 | + Database.db_session.query( |
| 428 | + cast(Metadata.value["pbench", "date"].as_string(), TZDateTime) |
| 429 | + ) |
| 430 | + .filter(Metadata.key == "metalog") |
| 431 | + .execution_options(stream_results=True) |
| 432 | + .yield_per(SQL_CHUNK) |
| 433 | + ) |
| 434 | + click.echo("Dataset statistics by creation date:") |
| 435 | + summarize_dates(rows, options.get("width")) |
| 436 | + |
| 437 | + |
| 438 | +def report_uploads(options: dict[str, Any]): |
| 439 | + """Report dataset statistics by upload date""" |
| 440 | + |
| 441 | + watcher.update("analyzing upload patterns") |
| 442 | + |
| 443 | + rows = ( |
| 444 | + Database.db_session.query(Dataset.uploaded) |
| 445 | + .execution_options(stream_results=True) |
| 446 | + .yield_per(SQL_CHUNK) |
| 447 | + ) |
| 448 | + click.echo("Dataset statistics by upload date:") |
| 449 | + summarize_dates(rows, options.get("width")) |
| 450 | + |
| 451 | + |
311 | 452 | def report_audit(): |
312 | 453 | """Report audit log statistics.""" |
313 | 454 |
|
@@ -521,78 +662,70 @@ def report_states(): |
521 | 662 | @click.option( |
522 | 663 | "--states", "-S", default=False, is_flag=True, help="Display operational states" |
523 | 664 | ) |
| 665 | +@click.option( |
| 666 | + "--statistics", |
| 667 | + type=click.Choice(["creation", "upload"], case_sensitive=False), |
| 668 | + help="Show upload statistics", |
| 669 | +) |
524 | 670 | @click.option( |
525 | 671 | "--verify", "-v", default=False, is_flag=True, help="Display intermediate messages" |
526 | 672 | ) |
| 673 | +@click.option("--width", type=int, default=80, help="Set output width") |
527 | 674 | @common_options |
528 | | -def report( |
529 | | - context: object, |
530 | | - all: bool, |
531 | | - archive: bool, |
532 | | - audit: bool, |
533 | | - backup: bool, |
534 | | - cache: bool, |
535 | | - detail: bool, |
536 | | - errors: bool, |
537 | | - progress: float, |
538 | | - sql: bool, |
539 | | - states: bool, |
540 | | - verify: bool, |
541 | | -): |
| 675 | +def report(context: object, **kwargs): |
542 | 676 | """ |
543 | 677 | Report statistics and problems in the SQL and on-disk representation of |
544 | 678 | Pbench datasets. |
545 | 679 | \f |
546 | 680 |
|
547 | 681 | Args: |
548 | 682 | context: click context |
549 | | - all: report all statistics |
550 | | - archive: report archive statistics |
551 | | - audit: report audit log statistics |
552 | | - backup: report backup statistics |
553 | | - cache: report cache statistics |
554 | | - detail: provide additional per-file diagnostics |
555 | | - errors: show individual file errors |
556 | | - sql: report SQL statistics |
557 | | - states: report operational states |
558 | | - verify: Report internal status |
| 683 | + kwargs: click options |
559 | 684 | """ |
560 | 685 | logger = None |
561 | 686 |
|
562 | 687 | global detailer, verifier, watcher |
563 | | - detailer = Detail(detail, errors) |
564 | | - verifier = Verify(verify) |
565 | | - watcher = Watch(progress) |
| 688 | + detailer = Detail(kwargs.get("detail"), kwargs.get("errors")) |
| 689 | + verifier = Verify(kwargs.get("verify")) |
| 690 | + watcher = Watch(kwargs.get("progress")) |
| 691 | + rv = 0 |
566 | 692 |
|
567 | 693 | try: |
568 | 694 | config = config_setup(context) |
569 | 695 | logger = get_pbench_logger("pbench-report-generator", config) |
570 | 696 | cache_m = CacheManager(config, logger) |
571 | | - if any((all, archive, backup)): |
| 697 | + if any((kwargs.get("all"), kwargs.get("archive"), kwargs.get("backup"))): |
572 | 698 | verifier.status("starting discovery") |
573 | 699 | watcher.update("discovering archive tree") |
574 | 700 | cache_m.full_discovery(search=False) |
575 | 701 | watcher.update("processing reports") |
576 | 702 | verifier.status("finished discovery") |
577 | | - if all or archive: |
| 703 | + if kwargs.get("all") or kwargs.get("archive"): |
578 | 704 | report_archive(cache_m) |
579 | | - if all or backup: |
| 705 | + if kwargs.get("all") or kwargs.get("backup"): |
580 | 706 | report_backup(cache_m) |
581 | | - if all or cache: |
| 707 | + if kwargs.get("all") or kwargs.get("cache"): |
582 | 708 | report_cache(cache_m) |
583 | | - if all or audit: |
| 709 | + stats = kwargs.get("statistics") |
| 710 | + if stats: |
| 711 | + if stats == "creation": |
| 712 | + report_creation(kwargs) |
| 713 | + elif stats == "upload": |
| 714 | + report_uploads(kwargs) |
| 715 | + else: |
| 716 | + click.echo(f"Unexpected statistics option {stats}", err=True) |
| 717 | + rv = 1 |
| 718 | + if kwargs.get("all") or kwargs.get("audit"): |
584 | 719 | report_audit() |
585 | | - if all or sql: |
| 720 | + if kwargs.get("all") or kwargs.get("sql"): |
586 | 721 | report_sql() |
587 | | - if all or states: |
| 722 | + if kwargs.get("all") or kwargs.get("states"): |
588 | 723 | report_states() |
589 | 724 | watcher.update("done") |
590 | | - |
591 | | - rv = 0 |
592 | 725 | except Exception as exc: |
593 | 726 | if logger: |
594 | 727 | logger.exception("An error occurred discovering the file tree: {}", exc) |
595 | | - if verify: |
| 728 | + if kwargs.get("verify"): |
596 | 729 | raise |
597 | 730 | click.secho(exc, err=True, bg="red") |
598 | 731 | rv = 2 if isinstance(exc, BadConfig) else 1 |
|
0 commit comments