diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b291813..b03cf8ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#549](https://github.com/ericaltendorf/plotman/pull/549)) - If the tmp drive selected for a plot is also listed as a dst drive then plotman will use the same drive for both. ([#643](https://github.com/ericaltendorf/plotman/pull/643)) +- `plotman prometheus` command to output status for consumption by [Prometheus](https://prometheus.io/). + ([#430](https://github.com/ericaltendorf/plotman/pull/430)) ## [0.4.1] - 2021-06-11 ### Fixed diff --git a/src/plotman/_tests/reporting_test.py b/src/plotman/_tests/reporting_test.py index fcf7cab1..5ce0ffa9 100644 --- a/src/plotman/_tests/reporting_test.py +++ b/src/plotman/_tests/reporting_test.py @@ -61,3 +61,21 @@ def test_job_viz_counts() -> None: ] assert(reporting.job_viz(jobs) == '1 2 .:;! 3 ! 4 ') # type: ignore[arg-type] + +def test_to_prometheus_format() -> None: + prom_stati = [ + ('foo="bar",baz="2"', {'metric1': 1, 'metric2': 2}), + ('foo="blubb",baz="3"', {'metric1': 2, 'metric2': 3}) + ] + metrics = {'metric1': 'This is foo', 'metric2': 'In a parallel universe this is foo'} + expected = [ + '# HELP metric1 This is foo.', + '# TYPE metric1 gauge', + 'metric1{foo="bar",baz="2"} 1', + 'metric1{foo="blubb",baz="3"} 2', + '# HELP metric2 In a parallel universe this is foo.', + '# TYPE metric2 gauge', + 'metric2{foo="bar",baz="2"} 2','metric2{foo="blubb",baz="3"} 3' + ] + result = reporting.to_prometheus_format(metrics, prom_stati) + assert(result == expected) diff --git a/src/plotman/plotman.py b/src/plotman/plotman.py index a844905a..eb20a6ed 100755 --- a/src/plotman/plotman.py +++ b/src/plotman/plotman.py @@ -34,9 +34,11 @@ def parse_args(self) -> typing.Any: sp.add_parser('version', help='print the version') p_status = sp.add_parser('status', help='show current plotting status') - p_status.add_argument("--json", action="store_true", + p_status.add_argument("--json", action="store_true", help="export status report in json format") + sp.add_parser('prometheus', help='show current plotting status in prometheus readable format') + sp.add_parser('dirs', help='show directories info') p_interactive = sp.add_parser('interactive', help='run interactive control/monitoring mode') @@ -223,6 +225,10 @@ def main() -> None: ) print(result) + # Prometheus report + if args.cmd == 'prometheus': + print(reporting.prometheus_report(jobs)) + # Directories report elif args.cmd == 'dirs': print(reporting.dirs_report(jobs, cfg.directories, cfg.archiving, cfg.scheduling, get_term_width())) diff --git a/src/plotman/reporting.py b/src/plotman/reporting.py index 3e6e0bfd..0a74d84d 100644 --- a/src/plotman/reporting.py +++ b/src/plotman/reporting.py @@ -124,6 +124,47 @@ def status_report(jobs: typing.List[job.Job], width: int, height: typing.Optiona return tab.draw() # type: ignore[no-any-return] +def to_prometheus_format(metrics: typing.Dict[str, str], prom_stati: typing.Sequence[typing.Tuple[str, typing.Mapping[str, typing.Optional[int]]]]) -> typing.List[str]: + prom_str_list = [] + for metric_name, metric_desc in metrics.items(): + prom_str_list.append(f'# HELP {metric_name} {metric_desc}.') + prom_str_list.append(f'# TYPE {metric_name} gauge') + for label_str, values in prom_stati: + prom_str_list.append('%s{%s} %s' % (metric_name, label_str, values[metric_name])) + return prom_str_list + +def prometheus_report(jobs: typing.List[job.Job], tmp_prefix: str = '', dst_prefix: str = '') -> str: + metrics = { + 'plotman_plot_phase_major': 'The phase the plot is currently in', + 'plotman_plot_phase_minor': 'The part of the phase the plot is currently in', + 'plotman_plot_tmp_usage': 'Tmp dir usage in bytes', + 'plotman_plot_mem_usage': 'Memory usage in bytes', + 'plotman_plot_user_time': 'Processor time (user) in s', + 'plotman_plot_sys_time': 'Processor time (sys) in s', + 'plotman_plot_iowait_time': 'Processor time (iowait) in s', + } + prom_stati = [] + for j in jobs: + labels = { + 'plot_id': j.plot_id[:8], + 'tmp_dir': abbr_path(j.tmpdir, tmp_prefix), + 'dst_dir': abbr_path(j.dstdir, dst_prefix), + 'run_status': j.get_run_status(), + 'phase': str(j.progress()), + } + label_str = ','.join([f'{k}="{v}"' for k, v in labels.items()]) + values = { + 'plotman_plot_phase_major': j.progress().major, + 'plotman_plot_phase_minor': j.progress().minor, + 'plotman_plot_tmp_usage': j.get_tmp_usage(), + 'plotman_plot_mem_usage': j.get_mem_usage(), + 'plotman_plot_user_time': j.get_time_user(), + 'plotman_plot_sys_time': j.get_time_sys(), + 'plotman_plot_iowait_time': j.get_time_iowait(), + } + prom_stati += [(label_str, values)] + return '\n'.join(to_prometheus_format(metrics, prom_stati)) + def summary(jobs: typing.List[job.Job], tmp_prefix: str = '') -> str: """Creates a small summary of running jobs"""