Skip to content

Commit 292918c

Browse files
committed
Build state tracker
Define a SQLAlchemy DB class "Dataset" which defines persistent state for a Pbench dataset including most importantly the dataset's username (which is not otherwise recorded until we index) and the dataset's current state so that we can track the progress of a dataset through the Pbench server pipeline. We also support Metadata associated with each Dataset, describing additional information about datasets beyond the "state". For example, the backup component will mark a dataset as "ARCHIVED" and pbench-reindex marks the selected datasets to "REINDEX". A new dataset starts in UPLOADING state and will progress through the steps as we perform operations on it. We're defining both "in progress" -ing steps such as INDEXING as well as completion steps (which are ready for the next operation) such as INDEXED. There are also two "terminal" states, EXPIRED and QUARANTINED, from which a dataset cannot exit. NOTE that the SqlAlchemy infrastructure borrows heavily from Nikhil's work on the user model; however I tried to improve initialization. I'm specifying our own database, which we expect to be part of the Postgres URI in the config file. My assumption and intent is that this name will resemble the ES index prefix (although Postgres doesn't like '-' characters); so, for example, with an index prefix of "pbench-drb" I'd choose a DB name like "pbench_drb". This allows multiple Pbench server instances to coexist on a single Postgres server. I also added some logic to create the DB if necessary and to create the DB tables before starting gunicorn ... as Postgres can have problems when multiple clients try to do this at the same time.
1 parent bded8bc commit 292918c

File tree

116 files changed

+1825
-12
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+1825
-12
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ build
77
dist
88
*~
99
????-*.patch
10+
.env
1011
.npmrc
1112
.yarnrc
1213
*.egg-info

jenkins/development.Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ RUN \
9797
rsync \
9898
screen \
9999
sos \
100+
sqlite \
100101
tar \
101102
xz \
102103
&& \

lib/pbench/cli/server/shell.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
import sys
55

66
from configparser import NoSectionError, NoOptionError
7+
from sqlalchemy_utils import database_exists, create_database
78

89
from pbench.common.exceptions import BadConfig, ConfigFileNotSpecified
910
from pbench.server.api import create_app, get_server_config
11+
from pbench.server.db.database import Database
12+
from pbench.common.logger import get_pbench_logger
1013

1114

1215
def app():
@@ -24,12 +27,24 @@ def main():
2427
except (ConfigFileNotSpecified, BadConfig) as e:
2528
print(e)
2629
sys.exit(1)
30+
logger = get_pbench_logger(__name__, server_config)
2731
try:
2832
host = str(server_config.get("pbench-server", "bind_host"))
2933
port = str(server_config.get("pbench-server", "bind_port"))
34+
db = str(server_config.get("Postgres", "db_uri"))
3035
workers = str(server_config.get("pbench-server", "workers"))
31-
except (NoOptionError, NoSectionError) as e:
32-
print(f"{__name__}: ERROR: {e.__traceback__}")
36+
37+
# Multiple gunicorn workers will attempt to connect to the DB; rather
38+
# than attempt to synchronize them, detect a missing DB (from the
39+
# postgres URI) and create it here. It's safer to do this here,
40+
# where we're single-threaded.
41+
if not database_exists(db):
42+
logger.info("Postgres DB {} doesn't exist", db)
43+
create_database(db)
44+
logger.info("Created DB {}", db)
45+
Database.init_db(server_config, logger)
46+
except (NoOptionError, NoSectionError):
47+
logger.exception(f"{__name__}: ERROR")
3348
sys.exit(1)
3449

3550
subprocess.run(

lib/pbench/server/api/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from pbench.server.database.database import Database
2222
from pbench.server.api.resources.query_apis.query_month_indices import QueryMonthIndices
2323
from pbench.server.api.auth import Auth
24-
2524
from pbench.server.api.resources.users_api import (
2625
RegisterUser,
2726
Login,

lib/pbench/server/api/resources/__init__.py

Whitespace-only changes.

lib/pbench/server/api/resources/upload_api.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from flask import request, jsonify
88
from werkzeug.utils import secure_filename
99
from pbench.server.utils import filesize_bytes
10+
from pbench.server.db.models.tracker import Dataset, States
11+
1012

1113
ALLOWED_EXTENSIONS = {"xz"}
1214

@@ -109,6 +111,12 @@ def put(self, controller):
109111
md5_full_path = Path(path, f"{filename}.md5")
110112
bytes_received = 0
111113

114+
# TODO: Need real user from PUT!
115+
116+
# Create a tracking dataset object; it'll begin in UPLOADING state
117+
dataset = Dataset.create(controller=controller, path=tar_full_path, md5=md5sum)
118+
self.logger.info("Uploading {}", dataset)
119+
112120
with tempfile.NamedTemporaryFile(mode="wb", dir=path) as ofp:
113121
chunk_size = 4096
114122
self.logger.debug("Writing chunks")
@@ -181,6 +189,11 @@ def put(self, controller):
181189
)
182190
raise
183191

192+
if dataset:
193+
try:
194+
dataset.advance(States.UPLOADED)
195+
except Exception:
196+
self.logger.exception("Unable to finalize {}", dataset)
184197
response = jsonify(dict(message="File successfully uploaded"))
185198
response.status_code = 201
186199
return response

lib/pbench/server/db/__init__.py

Whitespace-only changes.

lib/pbench/server/db/database.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import sys
2+
from sqlalchemy import create_engine
3+
from sqlalchemy.orm import scoped_session, sessionmaker
4+
from sqlalchemy.ext.declarative import declarative_base
5+
from pbench.server import NoOptionError, NoSectionError
6+
7+
8+
class Database:
9+
""" Database - a base class for declaring SQLAlchemy database model
10+
classes.
11+
"""
12+
13+
Base = declarative_base()
14+
15+
# Initialize the db scoped session
16+
db_session = scoped_session(sessionmaker(autocommit=False, autoflush=False))
17+
18+
@staticmethod
19+
def get_engine_uri(config, logger):
20+
try:
21+
psql = config.get("Postgres", "db_uri")
22+
return psql
23+
except (NoSectionError, NoOptionError):
24+
if logger:
25+
logger.error("Failed to find [Postgres] db_uri in configuration file.")
26+
sys.exit(1)
27+
28+
# return f"postgresql+{psql_driver}://{psql_username}:{psql_password}@{psql_host}:{psql_port}/{psql_db}"
29+
30+
@staticmethod
31+
def init_engine(server_config, logger):
32+
try:
33+
uri = Database.get_engine_uri(server_config, logger)
34+
return create_engine(uri)
35+
except Exception:
36+
if logger:
37+
logger.exception(
38+
"Exception while creating a sqlalchemy engine for {}", uri
39+
)
40+
sys.exit(1)
41+
42+
@staticmethod
43+
def init_db(server_config, logger):
44+
# Attach the logger to the base class for models to find
45+
if not hasattr(Database.Base, "logger"):
46+
Database.Base.logger = logger
47+
48+
# IMPORTANT:
49+
# Make sure all the models are imported before this function is called
50+
# so that they will be registered properly on the metadata. Otherwise
51+
# metadata will not have any tables and create_all functionality will
52+
# do nothing.
53+
try:
54+
Database.Base.query = Database.db_session.query_property()
55+
engine = Database.init_engine(server_config, logger)
56+
Database.Base.metadata.create_all(bind=engine)
57+
Database.db_session.configure(bind=engine)
58+
except Exception:
59+
if logger:
60+
logger.exception("Exception while initializing sqlalchemy database")
61+
sys.exit(1)

lib/pbench/server/db/models/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)