11import copy
22import datetime
33import enum
4- import os
54from pathlib import Path
65import re
7- from typing import Any , List , Optional , Tuple , Union
6+ from typing import Any , List , Optional , Union
87
98from sqlalchemy import Column , DateTime , Enum , event , ForeignKey , Integer , JSON , String
109from sqlalchemy .exc import IntegrityError , SQLAlchemyError
@@ -24,13 +23,25 @@ class DatasetError(Exception):
2423 pass
2524
2625
26+ class DatasetBadName (DatasetError ):
27+ """
28+ Specified filename does not follow Pbench tarball naming rules.
29+ """
30+
31+ def __init__ (self , name : Path ):
32+ self .name : str = str (name )
33+
34+ def __str__ (self ) -> str :
35+ return f"File name { self .name !r} does not end in { Dataset .TARBALL_SUFFIX !r} "
36+
37+
2738class DatasetSqlError (DatasetError ):
2839 """
2940 SQLAlchemy errors reported through Dataset operations.
3041
31- The exception will identify the controller and name of the target dataset,
32- along with the operation being attempted; the __cause__ will specify the
33- original SQLAlchemy exception.
42+ The exception will identify the name of the target dataset, along with the
43+ operation being attempted; the __cause__ will specify the original
44+ SQLAlchemy exception.
3445 """
3546
3647 def __init__ (self , operation : str , ** kwargs ):
@@ -46,12 +57,11 @@ class DatasetDuplicate(DatasetError):
4657 Attempt to create a Dataset that already exists.
4758 """
4859
49- def __init__ (self , controller : str , name : str ):
50- self .controller = controller
60+ def __init__ (self , name : str ):
5161 self .name = name
5262
5363 def __str__ (self ):
54- return f"Duplicate dataset { self .controller } | { self . name } "
64+ return f"Duplicate dataset { self .name !r } "
5565
5666
5767class DatasetNotFound (DatasetError ):
@@ -98,8 +108,8 @@ class DatasetTerminalStateViolation(DatasetTransitionError):
98108 An attempt was made to change the state of a dataset currently in a
99109 terminal state.
100110
101- The error text will identify the dataset by controller and name, and both
102- the current and requested new states.
111+ The error text will identify the dataset by name, and both the current and
112+ requested new states.
103113 """
104114
105115 def __init__ (self , dataset : "Dataset" , requested_state : "States" ):
@@ -115,8 +125,8 @@ class DatasetBadStateTransition(DatasetTransitionError):
115125 An attempt was made to advance a dataset to a new state that's not
116126 reachable from the current state.
117127
118- The error text will identify the dataset by controller and name, and both
119- the current and requested new states.
128+ The error text will identify the dataset by name, and both the current and
129+ requested new states.
120130 """
121131
122132 def __init__ (self , dataset : "Dataset" , requested_state : "States" ):
@@ -364,7 +374,6 @@ class Dataset(Database.Base):
364374 id Generated unique ID of table row
365375 owner Owning username of the dataset
366376 access Dataset is "private" to owner, or "public"
367- controller Name of controller node
368377 name Base name of dataset (tarball)
369378 md5 The dataset MD5 hash (Elasticsearch ID)
370379 created Tarball metadata timestamp (set during PUT)
@@ -415,9 +424,6 @@ class Dataset(Database.Base):
415424 # Access policy for Dataset (public or private)
416425 access = Column (String (255 ), unique = False , nullable = False , default = "private" )
417426
418- # Host name of the system that collected the data
419- controller = Column (String (255 ), unique = False , nullable = False )
420-
421427 # FIXME:
422428 # Ideally, `md5` would not be `nullable`, but allowing it means that
423429 # pbench-server-prep-shim-002 utility can construct a Dataset object
@@ -467,6 +473,31 @@ def is_tarball(path: Union[Path, str]) -> bool:
467473 """
468474 return str (path ).endswith (Dataset .TARBALL_SUFFIX )
469475
476+ @staticmethod
477+ def stem (path : Union [str , Path ]) -> str :
478+ """
479+ The Path.stem() removes a single suffix, so our standard "a.tar.xz"
480+ returns "a.tar" instead of "a". We could double-stem, but instead
481+ this just checks for the expected 7 character suffix and strips it.
482+
483+ If the path does not end in ".tar.xz" then the full path.name is
484+ returned.
485+
486+ Args:
487+ path: A file path that might be a Pbench tarball
488+
489+ Raises:
490+ BadFilename: the path name does not end in TARBALL_SUFFIX
491+
492+ Returns:
493+ The stripped "stem" of the dataset
494+ """
495+ p = Path (path )
496+ if __class__ .is_tarball (p ):
497+ return p .name [: - len (Dataset .TARBALL_SUFFIX )]
498+ else :
499+ raise DatasetBadName (p )
500+
470501 @validates ("state" )
471502 def validate_state (self , key : str , value : Any ) -> States :
472503 """
@@ -532,55 +563,6 @@ def validate_access(self, key: str, value: str) -> str:
532563 return access
533564 raise DatasetBadParameterType (value , "access keyword" )
534565
535- @staticmethod
536- def _render_path (patharg = None , controllerarg = None , namearg = None ) -> Tuple [str , str ]:
537- """
538- Process a `path` string and convert it into `controller` and/or `name`
539- strings.
540-
541- This pre-processes the controller and name before they are presented to
542- a query or constructor. If the calling context has only the full file
543- path of a dataset, this can extract both "controller" and "name" from
544- the path. It can also be used to construct either "controller" or
545- "name", as it will fill in either or both if not already present in
546- the dict.
547-
548- If the path is a symlink (e.g., a "TO-BACKUP" or other Pbench command
549- link, most likely when a dataset is quarantined due to some consistency
550- check failure), this code will follow the link in order to construct a
551- controller name from the original path without needing to make any
552- possibly fragile assumptions regarding the structure of the symlink
553- name.
554-
555- Args:
556- patharg: A tarball file path from which the controller (host)
557- name, the tarball dataset name (basename minus extension),
558- or both will be derived.
559- controllerarg: The controller name (hostname) of the dataset;
560- this is retained if specified, or will be constructed
561- from "path" if not present.
562- namearg: The dataset name (file path stem);
563- this is retained if specified, or will be constructed from
564- "path" if not present
565-
566- Returns:
567- A tuple of (controller, name) based on the three arguments
568- """
569- controller_result = controllerarg
570- name_result = namearg
571-
572- if patharg :
573- path = Path (patharg )
574- if path .is_symlink ():
575- path = Path (os .path .realpath (path ))
576- if not name_result :
577- name_result = path .name
578- if Dataset .is_tarball (name_result ):
579- name_result = name_result [: - len (Dataset .TARBALL_SUFFIX )]
580- if not controller_result :
581- controller_result = path .parent .name
582- return controller_result , name_result
583-
584566 @staticmethod
585567 def create (** kwargs ) -> "Dataset" :
586568 """
@@ -590,15 +572,7 @@ def create(**kwargs) -> "Dataset":
590572 Args:
591573 kwargs (dict):
592574 "owner": The owner of the dataset; defaults to None.
593- "path": A tarball file path from which the controller (host)
594- name, the tarball dataset name (basename minus extension),
595- or both will be derived.
596- "controller": The controller name (hostname) of the dataset;
597- this is retained if specified, or will be constructed
598- from "path" if not present.
599- "name": The dataset name (file path stem);
600- this is retained if specified, or will be constructed from
601- "path" if not present.
575+ "name": The dataset name (file path stem).
602576 "state": The initial state of the new dataset.
603577
604578 Returns:
@@ -608,18 +582,14 @@ def create(**kwargs) -> "Dataset":
608582 dataset = Dataset (** kwargs )
609583 dataset .add ()
610584 except Exception :
611- Dataset .logger .exception (
612- "Failed create: {}|{}" , kwargs .get ("controller" ), kwargs .get ("name" )
613- )
585+ Dataset .logger .exception ("Failed create: {}" , kwargs .get ("name" ))
614586 raise
615587 return dataset
616588
617589 @staticmethod
618- def attach (path = None , name = None , state = None ) -> "Dataset" :
590+ def attach (name = None , state = None ) -> "Dataset" :
619591 """
620- Attempt to find dataset for the specified dataset name, or using a
621- specified file path (see _render_path and the path_init event listener
622- for details).
592+ Attempt to find dataset for the specified dataset name.
623593
624594 If state is specified, attach will attempt to advance the dataset to
625595 that state.
@@ -628,12 +598,7 @@ def attach(path=None, name=None, state=None) -> "Dataset":
628598 using a full path, use Dataset.query instead.
629599
630600 Args:
631- "path": A tarball file path from which the controller (host)
632- name, the tarball dataset name (basename minus extension),
633- or both will be derived.
634- "name": The dataset name (file path stem);
635- this is retained if specified, or will be constructed from
636- "path" if not present.
601+ "name": The dataset name (file path stem).
637602 "state": The desired state to advance the dataset.
638603
639604 Raises:
@@ -649,12 +614,11 @@ def attach(path=None, name=None, state=None) -> "Dataset":
649614 Dataset: a dataset object in the desired state (if specified)
650615 """
651616 # Make sure we have a name if only path was specified
652- _ , name = Dataset ._render_path (patharg = path , namearg = name )
653617 dataset = Dataset .query (name = name )
654618
655619 if dataset is None :
656620 Dataset .logger .warning ("Dataset {} not found" , name )
657- raise DatasetNotFound (path = path , name = name )
621+ raise DatasetNotFound (name = name )
658622 elif state :
659623 dataset .advance (state )
660624 return dataset
@@ -682,7 +646,7 @@ def __str__(self) -> str:
682646 Returns:
683647 string: Representation of the dataset
684648 """
685- return f"{ self .owner .username } ({ self .owner_id } )|{ self .controller } | { self . name } "
649+ return f"{ self .owner .username } ({ self .owner_id } )|{ self .name } "
686650
687651 def advance (self , new_state : States ):
688652 """
@@ -726,15 +690,13 @@ def add(self):
726690 Database .db_session .add (self )
727691 Database .db_session .commit ()
728692 except IntegrityError as e :
729- Dataset .logger .warning (
730- "Duplicate dataset {}|{}: {}" , self .controller , self .name , e
731- )
693+ Dataset .logger .warning ("Duplicate dataset {}: {}" , self .name , e )
732694 Database .db_session .rollback ()
733- raise DatasetDuplicate (self .controller , self . name ) from None
695+ raise DatasetDuplicate (self .name ) from None
734696 except Exception :
735697 self .logger .exception ("Can't add {} to DB" , str (self ))
736698 Database .db_session .rollback ()
737- raise DatasetSqlError ("adding" , controller = self . controller , name = self .name )
699+ raise DatasetSqlError ("adding" , name = self .name )
738700
739701 def update (self ):
740702 """
@@ -746,9 +708,7 @@ def update(self):
746708 except Exception :
747709 self .logger .error ("Can't update {} in DB" , str (self ))
748710 Database .db_session .rollback ()
749- raise DatasetSqlError (
750- "updating" , controller = self .controller , name = self .name
751- )
711+ raise DatasetSqlError ("updating" , name = self .name )
752712
753713 def delete (self ):
754714 """
@@ -762,33 +722,6 @@ def delete(self):
762722 raise
763723
764724
765- @event .listens_for (Dataset , "init" )
766- def path_init (target , args , kwargs ):
767- """
768- Listen for an init event on a Dataset to process a path before the
769- SQLAlchemy constructor sees it.
770-
771- We want the constructor to see both "controller" and "name" parameters in
772- the kwargs representing the initial SQL column values. This listener allows
773- us to provide those values by specifying a file path, which is processed
774- into controller and name using the internal _render_path() helper.
775-
776- We will remove "path" from kwargs so that SQLAlchemy doesn't see it. (This
777- is an explicitly allowed side effect of the listener architecture.)
778- """
779- if "path" in kwargs :
780- controller , name = Dataset ._render_path (
781- patharg = kwargs .get ("path" ),
782- controllerarg = kwargs .get ("controller" ),
783- namearg = kwargs .get ("name" ),
784- )
785- if "controller" not in kwargs :
786- kwargs ["controller" ] = controller
787- if "name" not in kwargs :
788- kwargs ["name" ] = name
789- del kwargs ["path" ]
790-
791-
792725class Metadata (Database .Base ):
793726 """
794727 Retain secondary information about datasets
0 commit comments