Skip to content

Commit a37f67d

Browse files
committed
Remove __init__ for Model classes
Replace path preprocessing and column existence/type checks with SQLAlchemy event listeners and validator helpers.
1 parent f40f6be commit a37f67d

File tree

2 files changed

+139
-123
lines changed

2 files changed

+139
-123
lines changed

lib/pbench/server/db/models/tracker.py

Lines changed: 113 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
Enum,
1212
ForeignKey,
1313
UniqueConstraint,
14+
event,
1415
)
15-
from sqlalchemy.orm import relationship
16+
from sqlalchemy.orm import relationship, validates
1617
from sqlalchemy.exc import SQLAlchemyError
1718

1819
from pbench.server.db.database import Database
@@ -302,7 +303,7 @@ class Dataset(Database.Base):
302303
ANTI_USER = "public" # The "user" that owns unowned datasets
303304

304305
id = Column(Integer, primary_key=True, autoincrement=True)
305-
user = Column(String(255), unique=False, nullable=False)
306+
user = Column(String(255), unique=False, nullable=False, default=ANTI_USER)
306307
controller = Column(String(255), unique=False, nullable=False)
307308
name = Column(String(255), unique=False, nullable=False)
308309

@@ -315,9 +316,9 @@ class Dataset(Database.Base):
315316
# This could be improved when we drop `pbench-server-prep-shim-002`
316317
# as server `PUT` does not have the same problem.
317318
md5 = Column(String(255), unique=False, nullable=True)
318-
created = Column(DateTime, nullable=False)
319-
state = Column(Enum(States), unique=False, nullable=False)
320-
transition = Column(DateTime, nullable=False)
319+
created = Column(DateTime, nullable=False, default=datetime.datetime.now())
320+
state = Column(Enum(States), unique=False, nullable=False, default=States.UPLOADING)
321+
transition = Column(DateTime, nullable=False, default=datetime.datetime.now())
321322

322323
# NOTE: this relationship defines a `dataset` property in `Metadata`
323324
# that refers to the parent `Dataset` object.
@@ -334,53 +335,23 @@ class Dataset(Database.Base):
334335
# entirely by database and messages, we can improve this.
335336
__table_args__ = (UniqueConstraint("controller", "name"), {})
336337

337-
def __init__(self, **kwargs):
338+
@validates("state")
339+
def validate_state(self, key, value):
340+
"""Validate that the value provided for the Dataset state is a member
341+
of the States ENUM before it's applied by the SQLAlchemy constructor.
338342
"""
339-
Construct a new Dataset object; generally this only makes sense when
340-
taking in a new dataset via PUT, and the state of the dataset defaults
341-
to UPLOADING.
342-
343-
The dataset may be specified by controller and name, or by tarball
344-
file path. If path is specified and either controller or name are
345-
omitted, they'll be filled in from path. (If all three are specified,
346-
path is ignored.)
347-
348-
NOTE: until we can get rid of the old `gold` unit tests, we need to
349-
be able to create a Dataset in an arbitrary state, so that's allowed;
350-
however, deployed server code should never do this!
351-
352-
Parameters (all kwargs):
353-
354-
user User associated with the dataset
355-
controller Controller node
356-
name Name of dataset
357-
path Dataset path name (see _render_path method)
358-
md5 The dataset MD5 hash
359-
created Date the dataset was PUT to server [now]
360-
state The current state of the dataset [UPLOADING]
361-
"""
362-
Dataset._render_path(kwargs)
363-
if "state" in kwargs:
364-
if type(kwargs.get("state")) is not States:
365-
raise DatasetBadParameterType(kwargs.get("state"), States)
366-
super().__init__(**kwargs)
367-
self.user = kwargs.get("user", Dataset.ANTI_USER)
368-
self.controller = kwargs.get("controller")
369-
self.name = kwargs.get("name")
370-
time = datetime.datetime.now()
371-
self.created = time
372-
self.md5 = kwargs.get("md5")
373-
self.state = kwargs.get("state", States.UPLOADING)
374-
self.transition = time
343+
if type(value) is not States:
344+
raise DatasetBadParameterType(value, States)
345+
return value
375346

376347
@staticmethod
377-
def _render_path(kwargs):
348+
def _render_path(patharg=None, controllerarg=None, namearg=None):
378349
"""
379-
_render_path Process a `path` kwarg and convert it into `controller`
380-
and/or `name` kwargs, modifying the kwarg dict directly.
350+
_render_path Process a `path` string and convert it into `controller`
351+
and/or `name` strings.
381352
382-
This pre-processes the kwargs dict before it can be presented to a
383-
query or constructor. If the calling context has only the full file
353+
This pre-processes the controller and name before they are presented to
354+
a query or constructor. If the calling context has only the full file
384355
path of a dataset, this can extract both "controller" and "name" from
385356
the path. It can also be used to construct either "controller" or
386357
"name", as it will fill in either or both if not already present in
@@ -393,34 +364,34 @@ def _render_path(kwargs):
393364
possibly fragile assumptions regarding the structure of the symlink
394365
name.
395366
396-
IMPLEMENTATION NOTE: After pre-processing, whether used or not, the
397-
"path" key will be removed from the dict, as the SQLAlchemy model
398-
superclass would complain about the undefined column.
399-
400367
Args:
401-
kwargs (dict):
402-
"path": A tarball file path from which the controller (host)
403-
name, the tarball dataset name (basename minus extension)
404-
or both will be derived.
405-
"controller": The controller name (hostname) of the dataset;
406-
this is retained if specified, or will be constructed
407-
from "path" if not present.
408-
"name": The dataset name (file path basename minus ".tar.xz");
409-
this is retained if specified, or will be constructed from
410-
"path" if not present
368+
patharg: A tarball file path from which the controller (host)
369+
name, the tarball dataset name (basename minus extension),
370+
or both will be derived.
371+
controllerarg: The controller name (hostname) of the dataset;
372+
this is retained if specified, or will be constructed
373+
from "path" if not present.
374+
namearg: The dataset name (file path basename minus ".tar.xz");
375+
this is retained if specified, or will be constructed from
376+
"path" if not present
377+
378+
Returns:
379+
A tuple of (controller, name) based on the three arguments
411380
"""
412-
if "path" in kwargs:
413-
path = Path(kwargs.get("path"))
381+
controller_result = controllerarg
382+
name_result = namearg
383+
384+
if patharg:
385+
path = Path(patharg)
414386
if path.is_symlink():
415387
path = Path(os.path.realpath(path))
416-
if "name" not in kwargs:
417-
name = path.name
418-
if name.endswith(".tar.xz"):
419-
name = name[:-7]
420-
kwargs["name"] = name
421-
if "controller" not in kwargs:
422-
kwargs["controller"] = path.parent.name
423-
del kwargs["path"] # Don't let the parental unit see this
388+
if not name_result:
389+
name_result = path.name
390+
if name_result.endswith(".tar.xz"):
391+
name_result = name_result[:-7]
392+
if not controller_result:
393+
controller_result = path.parent.name
394+
return controller_result, name_result
424395

425396
@staticmethod
426397
def create(**kwargs):
@@ -432,7 +403,7 @@ def create(**kwargs):
432403
kwargs (dict):
433404
"user": The owner of the dataset; defaults to "public".
434405
"path": A tarball file path from which the controller (host)
435-
name, the tarball dataset name (basename minus extension)
406+
name, the tarball dataset name (basename minus extension),
436407
or both will be derived.
437408
"controller": The controller name (hostname) of the dataset;
438409
this is retained if specified, or will be constructed
@@ -458,27 +429,26 @@ def create(**kwargs):
458429
return dataset
459430

460431
@staticmethod
461-
def attach(**kwargs):
432+
def attach(path=None, controller=None, name=None, state=None):
462433
"""
463434
attach Attempt to fetch dataset for the controller and dataset name,
464-
or using a specified file path (see __init__ and _render_path).
435+
or using a specified file path (see _render_path and the path_init
436+
event listener for details).
465437
466-
If the `state` kwarg is specified, the dataset will be created in that
467-
state or, if it already exists, `attach` will attempt to advance the
468-
dataset to that state.
438+
If state is specified, attach will attempt to advance the dataset to
439+
that state.
469440
470441
Args:
471-
kwargs (dict):
472-
"path": A tarball file path from which the controller (host)
473-
name, the tarball dataset name (basename minus extension)
474-
or both will be derived.
475-
"controller": The controller name (hostname) of the dataset;
476-
this is retained if specified, or will be constructed
477-
from "path" if not present.
478-
"name": The dataset name (file path basename minus ".tar.xz");
479-
this is retained if specified, or will be constructed from
480-
"path" if not present.
481-
"state": The desired state to advance the dataset.
442+
"path": A tarball file path from which the controller (host)
443+
name, the tarball dataset name (basename minus extension),
444+
or both will be derived.
445+
"controller": The controller name (hostname) of the dataset;
446+
this is retained if specified, or will be constructed
447+
from "path" if not present.
448+
"name": The dataset name (file path basename minus ".tar.xz");
449+
this is retained if specified, or will be constructed from
450+
"path" if not present.
451+
"state": The desired state to advance the dataset.
482452
483453
Raises:
484454
DatasetSqlError: problem interacting with Database
@@ -493,9 +463,7 @@ def attach(**kwargs):
493463
Dataset: a dataset object in the desired state (if specified)
494464
"""
495465
# Make sure we have controller and name from path
496-
Dataset._render_path(kwargs)
497-
controller = kwargs.get("controller")
498-
name = kwargs.get("name")
466+
controller, name = Dataset._render_path(path, controller, name)
499467
try:
500468
dataset = (
501469
Database.db_session.query(Dataset)
@@ -507,12 +475,12 @@ def attach(**kwargs):
507475
"Error attaching {}>{}: {}", controller, name, str(e)
508476
)
509477
raise DatasetSqlError("attaching", controller, name) from e
510-
else:
511-
if dataset is None:
512-
Dataset.logger.warning("{}>{} not found", controller, name)
513-
raise DatasetNotFound(controller, name)
514-
elif "state" in kwargs:
515-
dataset.advance(kwargs.get("state"))
478+
479+
if dataset is None:
480+
Dataset.logger.warning("{}>{} not found", controller, name)
481+
raise DatasetNotFound(controller, name)
482+
elif state:
483+
dataset.advance(state)
516484
return dataset
517485

518486
def __str__(self):
@@ -583,6 +551,32 @@ def update(self):
583551
raise DatasetSqlError("updating", self.controller, self.name) from e
584552

585553

554+
@event.listens_for(Dataset, "init")
555+
def path_init(target, args, kwargs):
556+
"""Listen for an init event on a Dataset to process a path before the
557+
SQLAlchemy constructor sees it.
558+
559+
We want the constructor to see both "controller" and "name" parameters in
560+
the kwargs representing the initial SQL column values. This listener allows
561+
us to provide those values by specifying a file path, which is processed
562+
into controller and name using the internal _render_path() helper.
563+
564+
We will remove "path" from kwargs so that SQLAlchemy doesn't see it. (This
565+
is an explicitly allowed side effect of the listener architecture.)
566+
"""
567+
if "path" in kwargs:
568+
controller, name = Dataset._render_path(
569+
patharg=kwargs.get("path"),
570+
controllerarg=kwargs.get("controller"),
571+
namearg=kwargs.get("name"),
572+
)
573+
if "controller" not in kwargs:
574+
kwargs["controller"] = controller
575+
if "name" not in kwargs:
576+
kwargs["name"] = name
577+
del kwargs["path"]
578+
579+
586580
class Metadata(Database.Base):
587581
""" Retain secondary information about datasets
588582
@@ -609,26 +603,14 @@ class Metadata(Database.Base):
609603
Integer, ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False
610604
)
611605

612-
def __init__(self, **kwargs):
606+
@validates("key")
607+
def validate_key(self, key, value):
608+
"""Validate that the value provided for the Metadata key argument is an
609+
allowed name.
613610
"""
614-
Construct a new Metadata object for a dataset
615-
616-
Parameters:
617-
618-
key Metadata key (from METADATA_KEYS)
619-
value String value for key
620-
"""
621-
if "key" not in kwargs:
622-
raise MetadataMissingParameter("key")
623-
key = kwargs.get("key")
624-
if key not in Metadata.METADATA_KEYS:
625-
raise MetadataBadKey(key)
626-
if "value" not in kwargs:
627-
raise MetadataMissingKeyValue(key)
628-
value = kwargs.get("value")
629-
super().__init__(**kwargs)
630-
self.key = key
631-
self.value = value
611+
if value not in Metadata.METADATA_KEYS:
612+
raise MetadataBadKey(value)
613+
return value
632614

633615
@staticmethod
634616
def create(**kwargs):
@@ -735,3 +717,18 @@ def delete(self):
735717
Metadata.logger.exception("Can't delete {} from DB", self)
736718
Database.db_session.rollback()
737719
raise MetadataSqlError("deleting", self.dataset, self.key) from e
720+
721+
722+
@event.listens_for(Metadata, "init")
723+
def check_required(target, args, kwargs):
724+
"""Listen for an init event on Metadata to verify parameters before the
725+
SQLAlchemy constructor gets control.
726+
727+
We want the constructor to see both "key" and "value" parameters; if either
728+
isn't present, give a simpler and more useful error message rather than the
729+
internal SQL constraint failures that would result.
730+
"""
731+
if "key" not in kwargs:
732+
raise MetadataMissingParameter("key")
733+
if "value" not in kwargs:
734+
raise MetadataMissingKeyValue(kwargs.get("key"))

0 commit comments

Comments
 (0)