Skip to content

Commit 4a35b7e

Browse files
authored
An assortment of Pbench Ops fixes and fun (distributed-system-analysis#3612)
* An assortment of Pbench Ops fixes and fun This fixes several issues observed during ops review: 1. The `/api/v1/endpoints` API fails if the server is shut down 2. `tar` unpack errors can result in enormous `stderr` output, which is captured in the `Audit` log; truncate it to 5Mb 3. Change the `pbench-audit` utility to use `dateutil.parser` instead of `click.DateTime()` so we can include fractional seconds and timezone. During the time when we broke PostgreSQL, we failed to create metadata for a number of datasets that were allowed to upload. (Whether we should allow this vs failing the upload is a separate issue.) We have want to repair the excessively large `Audit` attributes records. So I took a stab at some wondrous and magical SQL queries and hackery to begin a new `pbench-repair` utility. Right now, it repairs long audit attributes "intelligently" by trimming individual JSON key values; and it add metadata to datasets which lack critical values. Currently, this includes `server.tarball-path` (which we need to enable TOC and visualization), `dataset.metalog` (capturing the tarball `metadata.log` file), and `server.benchmark` for visualization. There are other `server` namespace values (including expiration time) that could be repaired: I decided not to worry about that as we're not doing expiration anyway. (Though I might add it over the weekend, since it shouldn't be hard.) And there are probably other things we might want to repair in the future using this framework. I tested this in a `runlocal` container, using `psql` to "break" datasets and repair them. I hacked the local `repair.py` with a low "max error" limit to force truncation of audit attributes: ``` pbench-repair --detail --errors --verify --progress 10 (22:52:08) Repairing audit || 60:FAILURE upload fio_rw_2018.02.01T22.40.57 [message] truncated (107) to 105 || 116:SUCCESS apikey None [key] truncated (197) to 105 22 audit records had attributes too long 2 records were fixed (22:52:08) Repairing metadata || fio_rw_2018.02.01T22.40.57 has no server.tarball-path: setting /srv/pbench/archive/fs-version-001/dhcp31-45.perf.lab.eng.bos.redhat.com/08516cc7448035be2cc502f0517783fa/fio_rw_2018.02.01T22.40.57.tar.xz || fio_rw_2018.02.01T22.40.57 has no metalog: setting from metadata.log || fio_rw_2018.02.01T22.40.57 has no server.benchmark: setting 'fio' || pbench-user-benchmark_example-vmstat_2018.10.24T14.38.18 has no server.tarball-path: setting /srv/pbench/archive/fs-version-001/ansible-host/45f0e2af41977b89e07bae4303dc9972/pbench-user-benchmark_example-vmstat_2018.10.24T14.38.18.tar.xz || pbench-user-benchmark_example-vmstat_2018.10.24T14.38.18 has no metalog: setting from metadata.log || pbench-user-benchmark_example-vmstat_2018.10.24T14.38.18 has no server.benchmark: setting 'pbench-user-benchmark' 2 server.tarball-path repairs, 0 failures 2 dataset.metalog repairs, 0 failures 2 server.benchmark repairs ```
1 parent bf2792f commit 4a35b7e

File tree

13 files changed

+473
-66
lines changed

13 files changed

+473
-66
lines changed

lib/pbench/cli/server/__init__.py

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22
from threading import Thread
33
import time
4-
from typing import Any, Optional
4+
from typing import Any, Optional, Union
55

66
import click
77
from click import Context, Parameter, ParamType
@@ -11,6 +11,27 @@
1111
from pbench.server.database import init_db
1212

1313

14+
class DateParser(ParamType):
15+
"""The DateParser type converts date strings into `datetime` objects.
16+
17+
This is a variant of click's built-in DateTime parser, but uses the
18+
more flexible dateutil.parser
19+
"""
20+
21+
name = "dateparser"
22+
23+
def convert(
24+
self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
25+
) -> Any:
26+
if isinstance(value, datetime.datetime):
27+
return value
28+
29+
try:
30+
return parser.parse(value)
31+
except Exception as e:
32+
self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
33+
34+
1435
class Detail:
1536
"""Encapsulate generation of additional diagnostics"""
1637

@@ -63,29 +84,32 @@ def warning(self, message: str):
6384
class Verify:
6485
"""Encapsulate -v status messages."""
6586

66-
def __init__(self, verify: bool):
87+
def __init__(self, verify: Union[bool, int]):
6788
"""Initialize the object.
6889
6990
Args:
7091
verify: True to write status messages.
7192
"""
72-
self.verify = verify
93+
if isinstance(verify, int):
94+
self.verify = verify
95+
else:
96+
self.verify = 1 if verify else 0
7397

7498
def __bool__(self) -> bool:
7599
"""Report whether verification is enabled.
76100
77101
Returns:
78102
True if verification is enabled.
79103
"""
80-
return self.verify
104+
return bool(self.verify)
81105

82-
def status(self, message: str):
106+
def status(self, message: str, level: int = 1):
83107
"""Write a message if verification is enabled.
84108
85109
Args:
86110
message: status string
87111
"""
88-
if self.verify:
112+
if self.verify >= level:
89113
ts = datetime.datetime.now().astimezone()
90114
click.secho(f"({ts:%H:%M:%S}) {message}", fg="green", err=True)
91115

@@ -138,27 +162,6 @@ def watcher(self):
138162
)
139163

140164

141-
class DateParser(ParamType):
142-
"""The DateParser type converts date strings into `datetime` objects.
143-
144-
This is a variant of click's built-in DateTime parser, but uses the
145-
more flexible dateutil.parser
146-
"""
147-
148-
name = "dateparser"
149-
150-
def convert(
151-
self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
152-
) -> Any:
153-
if isinstance(value, datetime.datetime):
154-
return value
155-
156-
try:
157-
return parser.parse(value)
158-
except Exception as e:
159-
self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
160-
161-
162165
def config_setup(context: object) -> PbenchServerConfig:
163166
config = PbenchServerConfig.create(context.config)
164167
# We're going to need the DB to track dataset state, so setup DB access.

lib/pbench/cli/server/audit.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import click
66

77
from pbench.cli import pass_cli_context
8-
from pbench.cli.server import config_setup, Verify
8+
from pbench.cli.server import config_setup, DateParser, Verify
99
from pbench.cli.server.options import common_options
1010
from pbench.server import BadConfig, OperationCode
1111
from pbench.server.database.database import Database
@@ -110,6 +110,7 @@ def auditor(kwargs) -> Iterator[str]:
110110

111111
@click.command(name="pbench-audit")
112112
@pass_cli_context
113+
@click.option("--id", type=int, help="Select by audit event ID")
113114
@click.option(
114115
"--ids",
115116
default=False,
@@ -130,6 +131,7 @@ def auditor(kwargs) -> Iterator[str]:
130131
@click.option("--object-id", type=str, help="Select by object ID")
131132
@click.option("--object-name", type=str, help="Select by object name")
132133
@click.option("--page", default=False, is_flag=True, help="Paginate the output")
134+
@click.option("--root-id", type=int, help="Select by audit event root ID")
133135
@click.option("--user-id", type=str, help="Select by user ID")
134136
@click.option("--user-name", type=str, help="Select by username")
135137
@click.option(
@@ -142,12 +144,12 @@ def auditor(kwargs) -> Iterator[str]:
142144
)
143145
@click.option(
144146
"--since",
145-
type=click.DateTime(),
147+
type=DateParser(),
146148
help="Select entries on or after specified date/time",
147149
)
148150
@click.option(
149151
"--until",
150-
type=click.DateTime(),
152+
type=DateParser(),
151153
help="Select entries on or before specified date/time",
152154
)
153155
@click.option(

0 commit comments

Comments
 (0)