|
10 | 10 | import warnings |
11 | 11 | from pathlib import Path |
12 | 12 | from threading import Thread |
13 | | -from typing import Any, Callable, Dict, List, Mapping, NamedTuple, Optional, Union |
| 13 | +from typing import ( |
| 14 | + Any, |
| 15 | + Callable, |
| 16 | + Dict, |
| 17 | + Iterable, |
| 18 | + List, |
| 19 | + Mapping, |
| 20 | + NamedTuple, |
| 21 | + Optional, |
| 22 | + Union, |
| 23 | +) |
14 | 24 |
|
15 | 25 | import numpy |
16 | 26 | import pandas as pd |
@@ -80,10 +90,12 @@ def persist(self, df: pd.DataFrame): |
80 | 90 | ... |
81 | 91 |
|
82 | 92 | @abc.abstractmethod |
83 | | - def count_by_status(self, statuses: List[str]) -> dict: |
| 93 | + def count_by_status(self, statuses: Iterable[str] = ()) -> dict: |
84 | 94 | """ |
85 | 95 | Retrieve the number of jobs per status. |
86 | 96 |
|
| 97 | + :param statuses: List/set of statuses to include. If empty, all statuses are included. |
| 98 | +
|
87 | 99 | :return: dictionary with status as key and the count as value. |
88 | 100 | """ |
89 | 101 | ... |
@@ -355,12 +367,18 @@ def start_job_thread(self, start_job: Callable[[], BatchJob], job_db: JobDatabas |
355 | 367 |
|
356 | 368 | self._stop_thread = False |
357 | 369 | def run_loop(): |
| 370 | + |
| 371 | + # TODO: support user-provided `stats` |
| 372 | + stats = collections.defaultdict(int) |
| 373 | + |
358 | 374 | while ( |
359 | 375 | sum(job_db.count_by_status(statuses=["not_started", "created", "queued", "running"]).values()) > 0 |
360 | 376 | and not self._stop_thread |
361 | 377 | ): |
362 | 378 | self._job_update_loop(job_db=job_db, start_job=start_job) |
| 379 | + stats["run_jobs loop"] += 1 |
363 | 380 |
|
| 381 | + _log.info(f"Job status histogram: {job_db.count_by_status()}. Run stats: {dict(stats)}") |
364 | 382 | # Do sequence of micro-sleeps to allow for quick thread exit |
365 | 383 | for _ in range(int(max(1, self.poll_sleep))): |
366 | 384 | time.sleep(1) |
@@ -479,11 +497,15 @@ def run_jobs( |
479 | 497 | # TODO: start showing deprecation warnings for this usage pattern? |
480 | 498 | job_db.initialize_from_df(df) |
481 | 499 |
|
| 500 | + # TODO: support user-provided `stats` |
482 | 501 | stats = collections.defaultdict(int) |
| 502 | + |
483 | 503 | while sum(job_db.count_by_status(statuses=["not_started", "created", "queued", "running"]).values()) > 0: |
484 | 504 | self._job_update_loop(job_db=job_db, start_job=start_job, stats=stats) |
485 | 505 | stats["run_jobs loop"] += 1 |
486 | 506 |
|
| 507 | + # Show current stats and sleep |
| 508 | + _log.info(f"Job status histogram: {job_db.count_by_status()}. Run stats: {dict(stats)}") |
487 | 509 | time.sleep(self.poll_sleep) |
488 | 510 | stats["sleep"] += 1 |
489 | 511 |
|
@@ -791,9 +813,12 @@ def df(self) -> pd.DataFrame: |
791 | 813 | self._df = self.read() |
792 | 814 | return self._df |
793 | 815 |
|
794 | | - def count_by_status(self, statuses: List[str]) -> dict: |
| 816 | + def count_by_status(self, statuses: Iterable[str] = ()) -> dict: |
795 | 817 | status_histogram = self.df.groupby("status").size().to_dict() |
796 | | - return {k:v for k,v in status_histogram.items() if k in statuses} |
| 818 | + statuses = set(statuses) |
| 819 | + if statuses: |
| 820 | + status_histogram = {k: v for k, v in status_histogram.items() if k in statuses} |
| 821 | + return status_histogram |
797 | 822 |
|
798 | 823 | def get_by_status(self, statuses, max=None) -> pd.DataFrame: |
799 | 824 | """ |
|
0 commit comments