Skip to content

Commit 37be815

Browse files
authored
* Ops fixes Correct accounting and reporting of cache unpacking statistics. Avoid reflecting SQLAlchemy `init_db` errors into conmon. I realized that even though I'd shortened the traceback, it would still be displayed by Python on termination with an unhandled exception, and that would still be relayed through conmon one line at a time. To avoid that, I added an `except Exception` clause to print a simple message and exit. In the process, I realized that while I'd written a new Click-based `pbench-reindex`, the old `/opt/pbench-server/bin/pbench-reindex` script was still there, and referenced in several config files. I removed these.
1 parent 9c898cb commit 37be815

File tree

7 files changed

+59
-297
lines changed

7 files changed

+59
-297
lines changed

.flake8

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ doctests = True
33
filename =
44
*.py
55
*server/bin/pbench-index
6-
*server/bin/pbench-reindex
76
exclude = .venv,.git,.tox,dist,doc,*lib/python*,*egg,build,*web-server/*,
87
*agent/bench-scripts/test-bin/fio-histo-log-pctiles.py,
98
*agent/bench-scripts/tests/pbench-trafficgen/test-39.trafficgen/*,

lib/pbench/cli/server/report.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,11 @@ def report_cache(tree: CacheManager):
176176
lacks_metrics = 0
177177
bad_metrics = 0
178178
unpacked_count = 0
179-
unpacked_times = 0
179+
unpacked_multiple = 0
180180
stream_unpack_skipped = 0
181181
last_ref_errors = 0
182182
agecomp = Comparator("age", really_big=time.time() * 2.0)
183+
unpackcomp = Comparator("unpack")
183184
sizecomp = Comparator("size")
184185
compcomp = Comparator("compression")
185186
speedcomp = Comparator("speed", really_big=GINORMOUS_FP)
@@ -254,7 +255,9 @@ def report_cache(tree: CacheManager):
254255
else:
255256
found_metrics = True
256257
unpacked_count += 1
257-
unpacked_times += metrics["count"]
258+
if metrics["count"] > 1:
259+
unpacked_multiple += 1
260+
unpackcomp.add(dsname, metrics["count"])
258261
speedcomp.add(dsname, metrics["min"], metrics["max"])
259262
if size:
260263
stream_fast = size / metrics["min"] / MEGABYTE_FP
@@ -270,9 +273,14 @@ def report_cache(tree: CacheManager):
270273
f"{humanize.naturalsize(cached_size)}"
271274
)
272275
click.echo(
273-
f" {unpacked_count:,d} datasets have been unpacked a total of "
274-
f"{unpacked_times:,d} times"
276+
f" {unpacked_count:,d} datasets have unpack metrics and "
277+
f"{unpacked_multiple} have been unpacked more than once."
275278
)
279+
if unpackcomp.max > 1 or verifier.verify:
280+
click.echo(
281+
f" The most unpacked dataset has been unpacked {unpackcomp.max} times, "
282+
f"{unpackcomp.max_name}"
283+
)
276284
click.echo(
277285
" The least recently used cache was referenced "
278286
f"{humanize.naturaldate(oldest)}, {agecomp.min_name}"
@@ -318,12 +326,9 @@ def report_cache(tree: CacheManager):
318326
f"{last_ref_errors:,d} are missing reference timestamps, "
319327
f"{bad_size:,d} have bad size metadata"
320328
)
321-
if lacks_metrics or bad_metrics or verifier.verify:
322-
click.echo(
323-
f" {lacks_metrics:,d} datasets are missing unpack metric data, "
324-
f"{bad_metrics} have bad unpack metric data"
325-
)
326-
if lacks_tarpath:
329+
if bad_metrics or verifier.verify:
330+
click.echo(f" {bad_metrics:,d} have bad unpack metric data")
331+
if lacks_tarpath or verifier.verify:
327332
click.echo(
328333
f" {lacks_tarpath} datasets are missing server.tarball-path metadata"
329334
)

lib/pbench/cli/server/shell.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ def main() -> int:
222222

223223
try:
224224
return run_gunicorn(server_config, logger)
225-
except Exception:
226-
logger.exception("Unhandled exception running gunicorn")
225+
except Exception as exc:
226+
logger.exception("Unhandled exception running gunicorn: {!r}", str(exc))
227+
print(exc, file=sys.stderr)
227228
return 1

lib/pbench/server/database/database.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
from pbench.server import PbenchServerConfig
1212

1313

14+
class DatabaseInitError(Exception):
15+
pass
16+
17+
1418
class Database:
1519
# Create declarative base model that our model can inherit from
1620
Base = declarative_base()
@@ -64,33 +68,38 @@ def init_db(server_config: PbenchServerConfig, logger: Logger):
6468
# metadata will not have any tables and create_all functionality will do
6569
# nothing.
6670

67-
engine = create_engine(db_uri)
68-
db_url = urlparse(db_uri)
69-
if db_url.scheme == "sqlite":
70-
Database.Base.metadata.create_all(bind=engine)
71-
else:
72-
alembic_cfg = Config()
73-
alembic_cfg.set_main_option(
74-
"script_location", str(Path(__file__).parent / "alembic")
71+
try:
72+
engine = create_engine(db_uri)
73+
db_url = urlparse(db_uri)
74+
if db_url.scheme == "sqlite":
75+
Database.Base.metadata.create_all(bind=engine)
76+
else:
77+
alembic_cfg = Config()
78+
alembic_cfg.set_main_option(
79+
"script_location", str(Path(__file__).parent / "alembic")
80+
)
81+
alembic_cfg.set_main_option("prepend_sys_path", ".")
82+
with engine.begin() as connection:
83+
alembic_cfg.attributes["connection"] = connection
84+
command.upgrade(alembic_cfg, "head")
85+
Database.db_session = scoped_session(
86+
sessionmaker(bind=engine, autocommit=False, autoflush=False)
87+
)
88+
Database.Base.query = Database.db_session.query_property()
89+
90+
# Although most of the Pbench Server currently works with the default
91+
# SQLAlchemy transaction management, some parts rely on true atomic
92+
# transactions and need a better isolation level.
93+
# NOTE: In PostgreSQL we might use a slightly looser integrity level
94+
# like "REPEATABLE READ", however as this isn't supported in sqlite3
95+
# we're using the strictest "SERIALIZABLE" level.
96+
Database.maker = sessionmaker(
97+
bind=engine.execution_options(isolation_level="SERIALIZABLE")
7598
)
76-
alembic_cfg.set_main_option("prepend_sys_path", ".")
77-
with engine.begin() as connection:
78-
alembic_cfg.attributes["connection"] = connection
79-
command.upgrade(alembic_cfg, "head")
80-
Database.db_session = scoped_session(
81-
sessionmaker(bind=engine, autocommit=False, autoflush=False)
82-
)
83-
Database.Base.query = Database.db_session.query_property()
84-
85-
# Although most of the Pbench Server currently works with the default
86-
# SQLAlchemy transaction management, some parts rely on true atomic
87-
# transactions and need a better isolation level.
88-
# NOTE: In PostgreSQL we might use a slightly looser integrity level
89-
# like "REPEATABLE READ", however as this isn't supported in sqlite3
90-
# we're using the strictest "SERIALIZABLE" level.
91-
Database.maker = sessionmaker(
92-
bind=engine.execution_options(isolation_level="SERIALIZABLE")
93-
)
99+
except Exception as e:
100+
if logger:
101+
logger.error("Unable to initialize database {}: {!r}", db_uri, str(e))
102+
raise DatabaseInitError(str(e))
94103

95104
@staticmethod
96105
def dump_query(query: Query, logger: Logger, level: int = DEBUG):

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ include = '''
4444
| ^/agent/util-scripts/validate-ipaddress$
4545
| ^/agent/util-scripts/pbench-tool-meister-start$
4646
| ^/server/bin/pbench-index$
47-
| ^/server/bin/pbench-reindex$
4847
| ^/utils/fetch-curr-commit-id$
4948
'''
5049
extend-exclude = '''

server/bin/pbench-index

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,10 @@ if __name__ == "__main__":
204204
# exit status to 1. All finally handlers would have been executed, no
205205
# need to report a SIGTERM again.
206206
status = 1
207+
except Exception as e:
208+
# Don't let an unhandled exception generate a potentially massive
209+
# traceback that's unlikely to have much real value (and which will be
210+
# echoed to syslogd by conmon, one line at a time).
211+
print(f"Indexing failed with exception {str(e)!r}", file=sys.stderr)
212+
status = 1
207213
sys.exit(status)

0 commit comments

Comments
 (0)