1111 Enum ,
1212 ForeignKey ,
1313 UniqueConstraint ,
14+ event ,
1415)
15- from sqlalchemy .orm import relationship
16+ from sqlalchemy .orm import relationship , validates
1617from sqlalchemy .exc import SQLAlchemyError
1718
1819from pbench .server .db .database import Database
@@ -302,7 +303,7 @@ class Dataset(Database.Base):
302303 ANTI_USER = "public" # The "user" that owns unowned datasets
303304
304305 id = Column (Integer , primary_key = True , autoincrement = True )
305- user = Column (String (255 ), unique = False , nullable = False )
306+ user = Column (String (255 ), unique = False , nullable = False , default = ANTI_USER )
306307 controller = Column (String (255 ), unique = False , nullable = False )
307308 name = Column (String (255 ), unique = False , nullable = False )
308309
@@ -315,9 +316,9 @@ class Dataset(Database.Base):
315316 # This could be improved when we drop `pbench-server-prep-shim-002`
316317 # as server `PUT` does not have the same problem.
317318 md5 = Column (String (255 ), unique = False , nullable = True )
318- created = Column (DateTime , nullable = False )
319- state = Column (Enum (States ), unique = False , nullable = False )
320- transition = Column (DateTime , nullable = False )
319+ created = Column (DateTime , nullable = False , default = datetime . datetime . now () )
320+ state = Column (Enum (States ), unique = False , nullable = False , default = States . UPLOADING )
321+ transition = Column (DateTime , nullable = False , default = datetime . datetime . now () )
321322
322323 # NOTE: this relationship defines a `dataset` property in `Metadata`
323324 # that refers to the parent `Dataset` object.
@@ -334,53 +335,23 @@ class Dataset(Database.Base):
334335 # entirely by database and messages, we can improve this.
335336 __table_args__ = (UniqueConstraint ("controller" , "name" ), {})
336337
337- def __init__ (self , ** kwargs ):
338+ @validates ("state" )
339+ def validate_state (self , key , value ):
340+ """Validate that the value provided for the Dataset state is a member
341+ of the States ENUM before it's applied by the SQLAlchemy constructor.
338342 """
339- Construct a new Dataset object; generally this only makes sense when
340- taking in a new dataset via PUT, and the state of the dataset defaults
341- to UPLOADING.
342-
343- The dataset may be specified by controller and name, or by tarball
344- file path. If path is specified and either controller or name are
345- omitted, they'll be filled in from path. (If all three are specified,
346- path is ignored.)
347-
348- NOTE: until we can get rid of the old `gold` unit tests, we need to
349- be able to create a Dataset in an arbitrary state, so that's allowed;
350- however, deployed server code should never do this!
351-
352- Parameters (all kwargs):
353-
354- user User associated with the dataset
355- controller Controller node
356- name Name of dataset
357- path Dataset path name (see _render_path method)
358- md5 The dataset MD5 hash
359- created Date the dataset was PUT to server [now]
360- state The current state of the dataset [UPLOADING]
361- """
362- Dataset ._render_path (kwargs )
363- if "state" in kwargs :
364- if type (kwargs .get ("state" )) is not States :
365- raise DatasetBadParameterType (kwargs .get ("state" ), States )
366- super ().__init__ (** kwargs )
367- self .user = kwargs .get ("user" , Dataset .ANTI_USER )
368- self .controller = kwargs .get ("controller" )
369- self .name = kwargs .get ("name" )
370- time = datetime .datetime .now ()
371- self .created = time
372- self .md5 = kwargs .get ("md5" )
373- self .state = kwargs .get ("state" , States .UPLOADING )
374- self .transition = time
343+ if type (value ) is not States :
344+ raise DatasetBadParameterType (value , States )
345+ return value
375346
376347 @staticmethod
377- def _render_path (kwargs ):
348+ def _render_path (patharg = None , controllerarg = None , namearg = None ):
378349 """
379- _render_path Process a `path` kwarg and convert it into `controller`
380- and/or `name` kwargs, modifying the kwarg dict directly .
350+ _render_path Process a `path` string and convert it into `controller`
351+ and/or `name` strings .
381352
382- This pre-processes the kwargs dict before it can be presented to a
383- query or constructor. If the calling context has only the full file
353+ This pre-processes the controller and name before they are presented to
354+ a query or constructor. If the calling context has only the full file
384355 path of a dataset, this can extract both "controller" and "name" from
385356 the path. It can also be used to construct either "controller" or
386357 "name", as it will fill in either or both if not already present in
@@ -393,34 +364,34 @@ def _render_path(kwargs):
393364 possibly fragile assumptions regarding the structure of the symlink
394365 name.
395366
396- IMPLEMENTATION NOTE: After pre-processing, whether used or not, the
397- "path" key will be removed from the dict, as the SQLAlchemy model
398- superclass would complain about the undefined column.
399-
400367 Args:
401- kwargs (dict):
402- "path": A tarball file path from which the controller (host)
403- name, the tarball dataset name (basename minus extension)
404- or both will be derived.
405- "controller": The controller name (hostname) of the dataset;
406- this is retained if specified, or will be constructed
407- from "path" if not present.
408- "name": The dataset name (file path basename minus ".tar.xz");
409- this is retained if specified, or will be constructed from
410- "path" if not present
368+ patharg: A tarball file path from which the controller (host)
369+ name, the tarball dataset name (basename minus extension),
370+ or both will be derived.
371+ controllerarg: The controller name (hostname) of the dataset;
372+ this is retained if specified, or will be constructed
373+ from "path" if not present.
374+ namearg: The dataset name (file path basename minus ".tar.xz");
375+ this is retained if specified, or will be constructed from
376+ "path" if not present
377+
378+ Returns:
379+ A tuple of (controller, name) based on the three arguments
411380 """
412- if "path" in kwargs :
413- path = Path (kwargs .get ("path" ))
381+ controller_result = controllerarg
382+ name_result = namearg
383+
384+ if patharg :
385+ path = Path (patharg )
414386 if path .is_symlink ():
415387 path = Path (os .path .realpath (path ))
416- if "name" not in kwargs :
417- name = path .name
418- if name .endswith (".tar.xz" ):
419- name = name [:- 7 ]
420- kwargs ["name" ] = name
421- if "controller" not in kwargs :
422- kwargs ["controller" ] = path .parent .name
423- del kwargs ["path" ] # Don't let the parental unit see this
388+ if not name_result :
389+ name_result = path .name
390+ if name_result .endswith (".tar.xz" ):
391+ name_result = name_result [:- 7 ]
392+ if not controller_result :
393+ controller_result = path .parent .name
394+ return controller_result , name_result
424395
425396 @staticmethod
426397 def create (** kwargs ):
@@ -432,7 +403,7 @@ def create(**kwargs):
432403 kwargs (dict):
433404 "user": The owner of the dataset; defaults to "public".
434405 "path": A tarball file path from which the controller (host)
435- name, the tarball dataset name (basename minus extension)
406+ name, the tarball dataset name (basename minus extension),
436407 or both will be derived.
437408 "controller": The controller name (hostname) of the dataset;
438409 this is retained if specified, or will be constructed
@@ -458,27 +429,26 @@ def create(**kwargs):
458429 return dataset
459430
460431 @staticmethod
461- def attach (** kwargs ):
432+ def attach (path = None , controller = None , name = None , state = None ):
462433 """
463434 attach Attempt to fetch dataset for the controller and dataset name,
464- or using a specified file path (see __init__ and _render_path).
435+ or using a specified file path (see _render_path and the path_init
436+ event listener for details).
465437
466- If the `state` kwarg is specified, the dataset will be created in that
467- state or, if it already exists, `attach` will attempt to advance the
468- dataset to that state.
438+ If state is specified, attach will attempt to advance the dataset to
439+ that state.
469440
470441 Args:
471- kwargs (dict):
472- "path": A tarball file path from which the controller (host)
473- name, the tarball dataset name (basename minus extension)
474- or both will be derived.
475- "controller": The controller name (hostname) of the dataset;
476- this is retained if specified, or will be constructed
477- from "path" if not present.
478- "name": The dataset name (file path basename minus ".tar.xz");
479- this is retained if specified, or will be constructed from
480- "path" if not present.
481- "state": The desired state to advance the dataset.
442+ "path": A tarball file path from which the controller (host)
443+ name, the tarball dataset name (basename minus extension),
444+ or both will be derived.
445+ "controller": The controller name (hostname) of the dataset;
446+ this is retained if specified, or will be constructed
447+ from "path" if not present.
448+ "name": The dataset name (file path basename minus ".tar.xz");
449+ this is retained if specified, or will be constructed from
450+ "path" if not present.
451+ "state": The desired state to advance the dataset.
482452
483453 Raises:
484454 DatasetSqlError: problem interacting with Database
@@ -493,9 +463,7 @@ def attach(**kwargs):
493463 Dataset: a dataset object in the desired state (if specified)
494464 """
495465 # Make sure we have controller and name from path
496- Dataset ._render_path (kwargs )
497- controller = kwargs .get ("controller" )
498- name = kwargs .get ("name" )
466+ controller , name = Dataset ._render_path (path , controller , name )
499467 try :
500468 dataset = (
501469 Database .db_session .query (Dataset )
@@ -507,12 +475,12 @@ def attach(**kwargs):
507475 "Error attaching {}>{}: {}" , controller , name , str (e )
508476 )
509477 raise DatasetSqlError ("attaching" , controller , name ) from e
510- else :
511- if dataset is None :
512- Dataset .logger .warning ("{}>{} not found" , controller , name )
513- raise DatasetNotFound (controller , name )
514- elif " state" in kwargs :
515- dataset .advance (kwargs . get ( " state" ) )
478+
479+ if dataset is None :
480+ Dataset .logger .warning ("{}>{} not found" , controller , name )
481+ raise DatasetNotFound (controller , name )
482+ elif state :
483+ dataset .advance (state )
516484 return dataset
517485
518486 def __str__ (self ):
@@ -583,6 +551,32 @@ def update(self):
583551 raise DatasetSqlError ("updating" , self .controller , self .name ) from e
584552
585553
554+ @event .listens_for (Dataset , "init" )
555+ def path_init (target , args , kwargs ):
556+ """Listen for an init event on a Dataset to process a path before the
557+ SQLAlchemy constructor sees it.
558+
559+ We want the constructor to see both "controller" and "name" parameters in
560+ the kwargs representing the initial SQL column values. This listener allows
561+ us to provide those values by specifying a file path, which is processed
562+ into controller and name using the internal _render_path() helper.
563+
564+ We will remove "path" from kwargs so that SQLAlchemy doesn't see it. (This
565+ is an explicitly allowed side effect of the listener architecture.)
566+ """
567+ if "path" in kwargs :
568+ controller , name = Dataset ._render_path (
569+ patharg = kwargs .get ("path" ),
570+ controllerarg = kwargs .get ("controller" ),
571+ namearg = kwargs .get ("name" ),
572+ )
573+ if "controller" not in kwargs :
574+ kwargs ["controller" ] = controller
575+ if "name" not in kwargs :
576+ kwargs ["name" ] = name
577+ del kwargs ["path" ]
578+
579+
586580class Metadata (Database .Base ):
587581 """ Retain secondary information about datasets
588582
@@ -609,26 +603,14 @@ class Metadata(Database.Base):
609603 Integer , ForeignKey ("dataset.id" , ondelete = "CASCADE" ), nullable = False
610604 )
611605
612- def __init__ (self , ** kwargs ):
606+ @validates ("key" )
607+ def validate_key (self , key , value ):
608+ """Validate that the value provided for the Metadata key argument is an
609+ allowed name.
613610 """
614- Construct a new Metadata object for a dataset
615-
616- Parameters:
617-
618- key Metadata key (from METADATA_KEYS)
619- value String value for key
620- """
621- if "key" not in kwargs :
622- raise MetadataMissingParameter ("key" )
623- key = kwargs .get ("key" )
624- if key not in Metadata .METADATA_KEYS :
625- raise MetadataBadKey (key )
626- if "value" not in kwargs :
627- raise MetadataMissingKeyValue (key )
628- value = kwargs .get ("value" )
629- super ().__init__ (** kwargs )
630- self .key = key
631- self .value = value
611+ if value not in Metadata .METADATA_KEYS :
612+ raise MetadataBadKey (value )
613+ return value
632614
633615 @staticmethod
634616 def create (** kwargs ):
@@ -735,3 +717,18 @@ def delete(self):
735717 Metadata .logger .exception ("Can't delete {} from DB" , self )
736718 Database .db_session .rollback ()
737719 raise MetadataSqlError ("deleting" , self .dataset , self .key ) from e
720+
721+
722+ @event .listens_for (Metadata , "init" )
723+ def check_required (target , args , kwargs ):
724+ """Listen for an init event on Metadata to verify parameters before the
725+ SQLAlchemy constructor gets control.
726+
727+ We want the constructor to see both "key" and "value" parameters; if either
728+ isn't present, give a simpler and more useful error message rather than the
729+ internal SQL constraint failures that would result.
730+ """
731+ if "key" not in kwargs :
732+ raise MetadataMissingParameter ("key" )
733+ if "value" not in kwargs :
734+ raise MetadataMissingKeyValue (kwargs .get ("key" ))
0 commit comments