3737class Metadata :
3838 """Metadata representation
3939
40+ This class provides functionality for serialization / deserialization of
41+ python chlid classes to descriptors.
42+
43+ A **descriptor** is a JSON serializable `dict`.
44+ A **profile** is a JSON Schema dict that sets expectations on the format
45+ of the descriptor.
46+
4047 For proper functioning a child class must be decorated by
4148 "@attrs.define(kw_only=True, repr=False)" and ensure that
42- "Metadata.__attrs_post_init__" is called
49+ "Metadata.__attrs_post_init__" is called :
50+
51+ - `kw_only=True` is required because this class will need explicit
52+ keywords to be able to track which properties have been set at
53+ initialization (see implementation of `__new__`, which uses the keyword
54+ arguments `kwargs`)
55+ - `repr=False` is to avoid `attrs` to overwrite the inherited `__repr__`
56+ function defined in this class.
4357
4458 """
4559
4660 custom : dict [str , Any ] = {}
4761 """
48- List of custom parameters. Any extra properties will be added
62+ List of custom parameters. Any extra property will be added
4963 to the custom property.
5064 """
5165
@@ -61,6 +75,14 @@ def __attrs_post_init__(self):
6175 self .metadata_initiated = True
6276
6377 def __setattr__ (self , name : str , value : Any ):
78+ """Side effects when setting a property
79+
80+ Properties starting with `_` or `metadata_` have no side effects.
81+
82+ For all other properties, the "metatdata_assigned" and
83+ "metatadata_defaults" are update, depending of if the value has been
84+ set explicitely or implicitely as the default respectively.
85+ """
6486 if not name .startswith (("_" , "metadata_" )):
6587 if self .metadata_initiated :
6688 if value is not None :
@@ -74,6 +96,7 @@ def __setattr__(self, name: str, value: Any):
7496 super ().__setattr__ (name , value )
7597
7698 def __repr__ (self ) -> str :
99+ """Prints the descriptor of the object"""
77100 return pprint .pformat (self .to_descriptor (), sort_dicts = False )
78101
79102 @property
@@ -105,7 +128,15 @@ def handle_data(self, data: str):
105128 # Defined
106129
107130 def list_defined (self ) -> List [str ]:
131+ """Returns a list of all properties that have been defined.
132+
133+ TODO : the difference with metadata_assigned is that it lists values
134+ that are set in the class that are different from `metadata_defaults`.
135+ How is that possible, I thought metadata_defaults can only be set to
136+ the defaults ?
137+ """
108138 defined = list (self .metadata_assigned )
139+
109140 for name , default in self .metadata_defaults .items ():
110141 value = getattr (self , name , None )
111142 if isinstance (value , type ):
@@ -118,15 +149,27 @@ def add_defined(self, name: str) -> None:
118149 self .metadata_assigned .add (name )
119150
120151 def has_defined (self , name : str ) -> bool :
152+ """Whether a property has been defined explicitely"""
121153 return name in self .list_defined ()
122154
123155 def get_defined (self , name : str , * , default : Any = None ) -> Any :
156+ """Retrieve the value of a property if it has been explicitely
157+ assigned, or return a default value otherwise"""
124158 if self .has_defined (name ):
125159 return getattr (self , name )
126- if default is not None :
127- return default
160+
161+ return default
128162
129163 def set_not_defined (self , name : str , value : Any , * , distinct : bool = False ) -> None :
164+ """If no property with "name" has already been assigned, then assign
165+ "value" to this property, but without the side effects of setting an
166+ attribute (see
167+ `__setattr__`, in particular, "has_defined(name)" will still return
168+ False after definition).
169+
170+ Setting `distinct=True` will prevent from overwriting an already set
171+ (including default values or values set with this method already)
172+ """
130173 if not self .has_defined (name ) and value is not None :
131174 if distinct and getattr (self , name , None ) == value :
132175 return
@@ -141,6 +184,11 @@ def validate_descriptor(
141184 * ,
142185 basepath : Optional [str ] = None ,
143186 ) -> Report :
187+ """Validate a descriptor
188+
189+ To do so, it tries to convert a descriptor into a class instance, and
190+ report errors it has encountered (if any)
191+ """
144192 errors = []
145193 timer = helpers .Timer ()
146194 try :
@@ -165,36 +213,63 @@ def from_descriptor(
165213 allow_invalid : bool = False ,
166214 ** options : Any ,
167215 ) -> Self :
216+ """Constructs an instance from a descriptor.
217+
218+ This method will identify the most specialized Class and instantiate
219+ it given information provided in the descriptor.
220+
221+ "descriptor" can be provided as a path to a descriptor file. The path
222+ can be relative to a base path provided as an option with the name
223+ "basepath".
224+
225+ If `allow_invalid = True`, the class creation will try to continue
226+ despite the descriptor having errors.
227+ """
168228 descriptor_path = None
229+
169230 if isinstance (descriptor , str ):
170231 descriptor_path = descriptor
171232 basepath = options .pop ("basepath" , None )
172233 descriptor = helpers .join_basepath (descriptor , basepath )
173234 if "basepath" in inspect .signature (cls .__init__ ).parameters :
174235 options ["basepath" ] = helpers .parse_basepath (descriptor )
236+
175237 descriptor = cls .metadata_retrieve (descriptor )
176- # TODO: remove in next version
238+
239+ # TODO: remove in v6
177240 # Transform with a base class in case the type is not available
178241 cls .metadata_transform (descriptor )
179- type = descriptor .get ("type" )
242+
243+ expected_type = descriptor .get ("type" )
244+
245+ # python class "type" property, if present, has precedence over descriptor type
180246 class_type = vars (cls ).get ("type" )
181247 if isinstance (class_type , str ):
182- type = class_type
183- Class = cls .metadata_select_class (type )
248+ expected_type = class_type
249+
250+ # Get the most specialized class associated with the expected_type
251+ # (defaults to the current class if `expected_type` is `None`)
252+ Class = cls .metadata_select_class (expected_type )
184253 Error = Class .metadata_Error or platform .frictionless_errors .MetadataError
254+
185255 Class .metadata_transform (descriptor )
186256 errors = list (Class .metadata_validate (descriptor ))
257+
187258 if not allow_invalid :
188259 if errors :
189260 error = Error (note = "descriptor is not valid" )
190261 raise FrictionlessException (error , reasons = errors )
262+
191263 metadata = Class .metadata_import (descriptor , ** helpers .remove_non_values (options ))
192264 if descriptor_path :
193265 metadata .metadata_descriptor_path = descriptor_path
194266 metadata .metadata_descriptor_initial = metadata .to_descriptor ()
195267 return metadata # type: ignore
196268
197269 def to_descriptor (self , * , validate : bool = False ) -> types .IDescriptor :
270+ """Return a descriptor associated to the class instance.
271+ If `validate = True`, the descriptor will additionnaly be validated.
272+ """
198273 descriptor = self .metadata_export ()
199274 if validate :
200275 Error = self .metadata_Error or platform .frictionless_errors .MetadataError
@@ -277,16 +352,56 @@ def to_markdown(self, path: Optional[str] = None, table: bool = False) -> str:
277352 metadata_type : ClassVar [str ]
278353 metadata_Error : ClassVar [Optional [Type [Error ]]] = None
279354 metadata_profile : ClassVar [Dict [str , Any ]] = {}
355+ """A JSON Schema like dictionary that defines the expected format of the descriptor"""
356+
280357 metadata_profile_patch : ClassVar [Dict [str , Any ]] = {}
358+ """Change to the expected format of the descriptor
359+
360+ This will usually be used by child classes to amend and build upon the
361+ descriptor of their parent.
362+ """
363+
281364 metadata_profile_merged : ClassVar [Dict [str , Any ]] = {}
365+ """Provides a consolidated definition of the descriptor, taking into
366+ account a `metadata_profile` and all `metadata_profile_patch`es that
367+ apply.
368+ """
369+
282370 metadata_initiated : bool = False
371+
283372 metadata_assigned : Set [str ] = set ()
373+ """Set of all names of properties to which a value (different from None)
374+ has been _explicitely_ assigned (including with explicit arguments at
375+ object initialization)"""
376+
284377 metadata_defaults : Dict [str , Any ] = {}
378+ """Names and values of properties that have not been
379+ explicitely set, and that have been set to a default value instead"""
380+
285381 metadata_descriptor_path : Optional [str ] = None
382+ """Descriptor file path
383+ If applicable, i.e. if a class has been instantiated with
384+ a descriptor read from a file
385+ """
386+
286387 metadata_descriptor_initial : Optional [types .IDescriptor ] = None
388+ """Descriptor used for class instantiation
389+ If applicable, i.e. if a class has been instantiated with
390+ a descriptor
391+ """
287392
288393 @classmethod
289394 def metadata_select_class (cls , type : Optional [str ]) -> Type [Metadata ]:
395+ """Allows to specify a more specialized class for the "type" given as
396+ input
397+
398+ When a class can be dispatched into several different more
399+ specialized classes, this function makes the link between the type and
400+ the class.
401+
402+ Otherwise, "type" is expected to be None, and the current class is
403+ returned.
404+ """
290405 if type :
291406 note = f'unsupported type for "{ cls .metadata_type } ": { type } '
292407 Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
@@ -299,6 +414,11 @@ def metadata_select_property_class(cls, name: str) -> Optional[Type[Metadata]]:
299414
300415 @classmethod
301416 def metadata_ensure_profile (cls ):
417+ """Consolidates `metadata_profile` and `metadata_profile_patch`es
418+
419+ All patches are applied, in order from parent to child, in case of
420+ multiple successive inheritance.
421+ """
302422 if not cls .__dict__ .get ("metadata_profile_merged" , None ):
303423 cls .metadata_profile_merged = cls .metadata_profile
304424 for subcls in reversed (cls .mro ()):
@@ -310,14 +430,32 @@ def metadata_ensure_profile(cls):
310430
311431 @classmethod
312432 def metadata_retrieve (
313- cls , descriptor : Union [types .IDescriptor , str ], * , size : Optional [int ] = None
433+ cls ,
434+ descriptor : Union [types .IDescriptor , str , Path ],
435+ * ,
436+ size : Optional [int ] = None ,
314437 ) -> types .IDescriptor :
438+ """Copy or fetch the "descriptor" as a dictionnary.
439+
440+ If "descriptor" is a string or Path, then it is interpreted as a
441+ (possibly remote) path to a descriptor file.
442+
443+ The content of the file is expected to be in JSON format, except if
444+ the filename has an explicit `.yaml` extension.
445+
446+ """
315447 try :
316448 if isinstance (descriptor , Mapping ):
317449 return deepcopy (descriptor )
450+
451+ # Types are tested explicitely,
452+ # for providing feedback to users that do not comply with
453+ # the function signature and provide a wrong type
318454 if isinstance (descriptor , (str , Path )): # type: ignore
455+ # descriptor is read from (possibly remote) file
319456 if isinstance (descriptor , Path ):
320457 descriptor = str (descriptor )
458+
321459 if helpers .is_remote_path (descriptor ):
322460 session = platform .frictionless .system .http_session
323461 response = session .get (descriptor , stream = True )
@@ -328,20 +466,36 @@ def metadata_retrieve(
328466 else :
329467 with open (descriptor , encoding = "utf-8" ) as file :
330468 content = file .read (size )
469+
331470 if descriptor .endswith (".yaml" ):
332471 metadata = platform .yaml .safe_load (io .StringIO (content ))
333472 else :
334473 metadata = json .loads (content )
474+
335475 assert isinstance (metadata , dict )
336476 return metadata # type: ignore
477+
337478 raise TypeError ("descriptor type is not supported" )
479+
338480 except Exception as exception :
339481 Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
340482 note = f'cannot retrieve metadata "{ descriptor } " because "{ exception } "'
341483 raise FrictionlessException (Error (note = note )) from exception
342484
343485 @classmethod
344486 def metadata_transform (cls , descriptor : types .IDescriptor ):
487+ """Transform the descriptor inplace before serializing into a python class
488+ instance.
489+
490+ The transformation applies recursively to any property handled with
491+ `metadata_select_property_class(name)`.
492+
493+ The actual transformation steps are defined by child classes, which must call
494+ `super().metadata_transform` to ensure recursive transformation.
495+
496+ This can be used for instance for retrocompatibility, converting
497+ former descriptors into new ones.
498+ """
345499 profile = cls .metadata_ensure_profile ()
346500 for name in profile .get ("properties" , {}):
347501 value = descriptor .get (name )
@@ -364,12 +518,22 @@ def metadata_validate(
364518 profile : Optional [Union [types .IDescriptor , str ]] = None ,
365519 error_class : Optional [Type [Error ]] = None ,
366520 ) -> Generator [Error , None , None ]:
521+ """Validates a descriptor according to a profile
522+
523+ A **profile** is a JSON Schema dict that sets expectations on the format
524+ of the descriptor.
525+
526+ The profile to validate can be set explicitely ("profile" parameter),
527+ otherwise it defaults to the class profile.
528+ """
367529 Error = error_class
368530 if not Error :
369531 Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
532+
370533 profile = profile or cls .metadata_ensure_profile ()
371534 if isinstance (profile , str ):
372535 profile = cls .metadata_retrieve (profile )
536+
373537 validator_class = platform .jsonschema .validators .validator_for (profile ) # type: ignore
374538 validator = validator_class (profile ) # type: ignore
375539 for error in validator .iter_errors (descriptor ): # type: ignore
@@ -379,6 +543,7 @@ def metadata_validate(
379543 if metadata_path :
380544 note = f"{ note } at property '{ metadata_path } '"
381545 yield Error (note = note )
546+
382547 for name in profile .get ("properties" , {}):
383548 value = descriptor .get (name )
384549 Class = cls .metadata_select_property_class (name )
@@ -400,6 +565,10 @@ def metadata_import(
400565 with_basepath : bool = False ,
401566 ** options : Any ,
402567 ) -> Self :
568+ """Deserialization of a descriptor to a class instance
569+
570+ The deserialization and serialization must be lossless
571+ """
403572 merged_options = {}
404573 profile = cls .metadata_ensure_profile ()
405574 basepath = options .pop ("basepath" , None )
@@ -431,6 +600,10 @@ def metadata_import(
431600 return metadata
432601
433602 def metadata_export (self , * , exclude : List [str ] = []) -> types .IDescriptor :
603+ """Serialize class instance to descriptor
604+
605+ The deserialization and serialization must be lossless
606+ """
434607 descriptor = {}
435608 profile = self .metadata_ensure_profile ()
436609 for name in profile .get ("properties" , {}):
0 commit comments